Create make_tiny_model.py
Browse files- make_tiny_model.py +58 -0
make_tiny_model.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Adapted from https://huggingface.co/stas/tiny-random-llama-2/blob/main/make_tiny_model.py
|
| 2 |
+
|
| 3 |
+
import subprocess
|
| 4 |
+
import shlex
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
mname_from = "Qwen/Qwen1.5-MoE-A2.7B"
|
| 10 |
+
mname_tiny = "peft-internal-testing/tiny-random-qwen-1.5-MoE"
|
| 11 |
+
vocab_keep_items = 3000
|
| 12 |
+
|
| 13 |
+
config = AutoConfig.from_pretrained(mname_from)
|
| 14 |
+
# print("orig config", config)
|
| 15 |
+
config.update(dict(
|
| 16 |
+
hidden_size=16,
|
| 17 |
+
intermediate_size=64,
|
| 18 |
+
num_attention_heads=4,
|
| 19 |
+
num_hidden_layers=2,
|
| 20 |
+
max_position_embeddings=256,
|
| 21 |
+
num_key_value_heads=4,
|
| 22 |
+
vocab_size=vocab_keep_items,
|
| 23 |
+
num_experts=4,
|
| 24 |
+
num_experts_per_tok=2
|
| 25 |
+
))
|
| 26 |
+
print("new config", config)
|
| 27 |
+
|
| 28 |
+
# create a tiny random model
|
| 29 |
+
tiny_model = AutoModelForCausalLM.from_config(config)
|
| 30 |
+
print(f"num of params {tiny_model.num_parameters()}")
|
| 31 |
+
|
| 32 |
+
# shrink it more and save
|
| 33 |
+
tiny_model.bfloat16() # half-size
|
| 34 |
+
tiny_model.save_pretrained(mname_tiny)
|
| 35 |
+
|
| 36 |
+
# shrink the tokenizer from 32k to 3k vocab
|
| 37 |
+
tokenizer_fast = AutoTokenizer.from_pretrained(mname_from)
|
| 38 |
+
tmp_dir = f"/tmp/{mname_from}"
|
| 39 |
+
tokenizer_fast.save_pretrained(tmp_dir)
|
| 40 |
+
# resize tokenizer.json (vocab.txt will be automatically resized on save_pretrained)
|
| 41 |
+
# perl -0777 -pi -e 's|(2999).*|$1},"merges": []}}|msg' tokenizer.json # 0-indexed, so vocab_keep_items-1!
|
| 42 |
+
closing_pat = '},"merges": []}}'
|
| 43 |
+
cmd = (f"perl -0777 -pi -e 's|({vocab_keep_items-1}).*|$1{closing_pat}|msg' {tmp_dir}/tokenizer.json")
|
| 44 |
+
#print(f"Running:\n{cmd}")
|
| 45 |
+
result = subprocess.run(shlex.split(cmd), capture_output=True, text=True)
|
| 46 |
+
#print(result)
|
| 47 |
+
|
| 48 |
+
# reload with modified tokenizer
|
| 49 |
+
tokenizer_fast_tiny = AutoTokenizer.from_pretrained(tmp_dir)
|
| 50 |
+
tokenizer_fast_tiny.save_pretrained(mname_tiny)
|
| 51 |
+
|
| 52 |
+
# test the new model and tokenizer function
|
| 53 |
+
model_inputs = tokenizer_fast_tiny("Making tiny model", return_tensors="pt")
|
| 54 |
+
gen_tokens = tiny_model.generate(**model_inputs, max_new_tokens=100)
|
| 55 |
+
print(tokenizer_fast_tiny.batch_decode(gen_tokens, skip_special_tokens=True))
|
| 56 |
+
print("Random output should be expected, but no crashing")
|
| 57 |
+
|
| 58 |
+
print(f"Model+Tokenizer saved in {mname_tiny}")
|