Jayashree Sridhar commited on
Commit
e93f267
·
1 Parent(s): 292f6f6

Added TinyGPT2Model file

Browse files
Files changed (1) hide show
  1. models/tinygpt2_model.py +99 -0
models/tinygpt2_model.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ TinyGPT2 Model Wrapper for easy integration (CPU-friendly)
3
+ """
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
+ import torch
6
+ import os
7
+ HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
8
+ class TinyGPT2Model:
9
+ """
10
+ Wrapper for sshleifer/tiny-gpt2 model with caching and optimization
11
+ Suitable for CPU-only Hugging Face Spaces
12
+ """
13
+ _instance = None
14
+ _model = None
15
+ _tokenizer = None
16
+
17
+ def __new__(cls):
18
+ if cls._instance is None:
19
+ cls._instance = super().__new__(cls)
20
+ return cls._instance
21
+
22
+ def __init__(self):
23
+ if TinyGPT2Model._model is None:
24
+ self._initialize_model()
25
+
26
+ def _initialize_model(self):
27
+ """Initialize Tiny-GPT2 model"""
28
+ print("Loading TinyGPT2 model...")
29
+
30
+ model_id = "sshleifer/tiny-gpt2"
31
+
32
+ # Load tokenizer (no need for token argument, model is public)
33
+ TinyGPT2Model._tokenizer = AutoTokenizer.from_pretrained(model_id,token=HUGGINGFACE_TOKEN)
34
+
35
+ # Load model (no quantization, pure CPU)
36
+ TinyGPT2Model._model = AutoModelForCausalLM.from_pretrained(
37
+ model_id,token=HUGGINGFACE_TOKEN,
38
+ torch_dtype=torch.float32 # Safe for CPU only
39
+ )
40
+
41
+ print("TinyGPT2 model loaded successfully!")
42
+
43
+ def generate(
44
+ self,
45
+ prompt: str,
46
+ max_length: int = 64,
47
+ temperature: float = 0.7,
48
+ top_p: float = 0.95
49
+ ) -> str:
50
+ """Generate response from TinyGPT2"""
51
+
52
+ # For TinyGPT2, no special prompt formatting needed
53
+ formatted_prompt = prompt
54
+
55
+ # Tokenize
56
+ inputs = TinyGPT2Model._tokenizer(
57
+ formatted_prompt,
58
+ return_tensors="pt",
59
+ truncation=True,
60
+ max_length=256
61
+ )
62
+
63
+ # Move to CPU (optional, for explicitness)
64
+ inputs = {k: v.cpu() for k, v in inputs.items()}
65
+
66
+ # Generate on CPU
67
+ with torch.no_grad():
68
+ outputs = TinyGPT2Model._model.generate(
69
+ **inputs,
70
+ max_new_tokens=max_length,
71
+ temperature=temperature,
72
+ top_p=top_p,
73
+ do_sample=True,
74
+ pad_token_id=TinyGPT2Model._tokenizer.eos_token_id
75
+ )
76
+
77
+ # Decode only the newly generated tokens (after the prompt)
78
+ response = TinyGPT2Model._tokenizer.decode(
79
+ outputs[0][inputs['input_ids'].shape[1]:],
80
+ skip_special_tokens=True
81
+ )
82
+
83
+ return response.strip()
84
+
85
+ def generate_embedding(self, text: str) -> torch.Tensor:
86
+ """Generate embeddings for text using last hidden state"""
87
+ inputs = TinyGPT2Model._tokenizer(
88
+ text,
89
+ return_tensors="pt",
90
+ truncation=True,
91
+ max_length=256
92
+ )
93
+ inputs = {k: v.cpu() for k, v in inputs.items()}
94
+
95
+ with torch.no_grad():
96
+ outputs = TinyGPT2Model._model(**inputs, output_hidden_states=True)
97
+ embeddings = outputs.hidden_states[-1].mean(dim=1)
98
+
99
+ return embeddings