GraphGen / graphgen /models /tokenizer /tiktoken_tokenizer.py
github-actions[bot]
Auto-sync from demo at Thu Oct 23 12:37:24 UTC 2025
8c66169
raw
history blame
455 Bytes
from typing import List
import tiktoken
from graphgen.bases import BaseTokenizer
class TiktokenTokenizer(BaseTokenizer):
def __init__(self, model_name: str = "cl100k_base"):
super().__init__(model_name)
self.enc = tiktoken.get_encoding(self.model_name)
def encode(self, text: str) -> List[int]:
return self.enc.encode(text)
def decode(self, token_ids: List[int]) -> str:
return self.enc.decode(token_ids)