File size: 458 Bytes
049b491
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
from transformers import AutoTokenizer

class DeepseekOCRTokenizer:
    """
    This is a thin wrapper for using an existing tokenizer (e.g., DeepSeek or GPT2)
    under the custom model_type 'deepseekocr'.
    """

    @classmethod
    def from_pretrained(cls, *args, **kwargs):
        # You can swap this base model if your tokenizer came from another checkpoint
        return AutoTokenizer.from_pretrained("deepseek-ai/deepseek-coder", *args, **kwargs)