Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -24,14 +24,29 @@ CACHE_DIR = "./model_cache"
|
|
| 24 |
# Download model files
|
| 25 |
config_path = hf_hub_download(MODEL_REPO, "config.json", cache_dir=CACHE_DIR)
|
| 26 |
model_path = hf_hub_download(MODEL_REPO, "model.keras", cache_dir=CACHE_DIR)
|
| 27 |
-
tokenizer_path = hf_hub_download(MODEL_REPO, "tokenizer.json", cache_dir=CACHE_DIR)
|
| 28 |
|
| 29 |
# Load config
|
| 30 |
with open(config_path, 'r') as f:
|
| 31 |
config = json.load(f)
|
| 32 |
|
| 33 |
-
#
|
| 34 |
-
tokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
eos_token_id = config.get('eos_token_id', 50256)
|
| 36 |
|
| 37 |
# Load model with TF function optimization
|
|
|
|
| 24 |
# Download model files
|
| 25 |
config_path = hf_hub_download(MODEL_REPO, "config.json", cache_dir=CACHE_DIR)
|
| 26 |
model_path = hf_hub_download(MODEL_REPO, "model.keras", cache_dir=CACHE_DIR)
|
|
|
|
| 27 |
|
| 28 |
# Load config
|
| 29 |
with open(config_path, 'r') as f:
|
| 30 |
config = json.load(f)
|
| 31 |
|
| 32 |
+
# Create tokenizer from scratch
|
| 33 |
+
print("📦 Creating tokenizer from GPT-2 base...")
|
| 34 |
+
from transformers import AutoTokenizer
|
| 35 |
+
|
| 36 |
+
hf_tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
| 37 |
+
|
| 38 |
+
# Add custom tokens
|
| 39 |
+
custom_tokens = ["<|im_start|>", "<|im_end|>"]
|
| 40 |
+
hf_tokenizer.add_special_tokens({"additional_special_tokens": custom_tokens})
|
| 41 |
+
|
| 42 |
+
# Save and reload as tokenizers format
|
| 43 |
+
os.makedirs("./temp_tokenizer", exist_ok=True)
|
| 44 |
+
hf_tokenizer.save_pretrained("./temp_tokenizer")
|
| 45 |
+
tokenizer = Tokenizer.from_file("./temp_tokenizer/tokenizer.json")
|
| 46 |
+
|
| 47 |
+
print(f"✅ Tokenizer created with vocab size: {tokenizer.get_vocab_size()}")
|
| 48 |
+
print(f" Custom tokens: {custom_tokens}")
|
| 49 |
+
|
| 50 |
eos_token_id = config.get('eos_token_id', 50256)
|
| 51 |
|
| 52 |
# Load model with TF function optimization
|