Spaces:

YuvrajSingh9886
/

StoryKimi-Zero

Sleeping

File size: 1,086 Bytes

from transformers import AutoTokenizer

class Tokenizer:
    
    def __init__(self, hf_token=None) -> None:
        # Try to get token from environment if not provided
        
        if hf_token:
            print(f"[INFO] Using HF token for model access")
        else:
            print("[INFO] No HF token provided - using public models only")
        
        # Use a public tokenizer instead of gated Llama model
        # GPT-2 tokenizer is widely compatible and doesn't require authentication
        try:
            if hf_token:
                # Try Llama tokenizer first if we have a token
                self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", token=hf_token)
            else:
                raise Exception("No token - using fallback")
        except:
            print("[INFO] Fallback to public GPT-2 tokenizer")
            self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
            
        self.tokenizer.add_special_tokens({'pad_token': '[PAD]'})

    def ready_tokenizer(self):
        
        return self.tokenizer