Spaces:
Sleeping
Sleeping
| from transformers import AutoTokenizer | |
| class Tokenizer: | |
| def __init__(self, hf_token=None) -> None: | |
| # Try to get token from environment if not provided | |
| if hf_token: | |
| print(f"[INFO] Using HF token for model access") | |
| else: | |
| print("[INFO] No HF token provided - using public models only") | |
| # Use a public tokenizer instead of gated Llama model | |
| # GPT-2 tokenizer is widely compatible and doesn't require authentication | |
| try: | |
| if hf_token: | |
| # Try Llama tokenizer first if we have a token | |
| self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", token=hf_token) | |
| else: | |
| raise Exception("No token - using fallback") | |
| except: | |
| print("[INFO] Fallback to public GPT-2 tokenizer") | |
| self.tokenizer = AutoTokenizer.from_pretrained("gpt2") | |
| self.tokenizer.add_special_tokens({'pad_token': '[PAD]'}) | |
| def ready_tokenizer(self): | |
| return self.tokenizer | |