Javedalam commited on
Commit
55dbe5b
·
verified ·
1 Parent(s): f7faccc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -0
app.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import gradio as gr
4
+
5
+ MODEL_ID = "qvac/genesis-i-model"
6
+
7
+ # ----------------------
8
+ # Load tokenizer & model
9
+ # ----------------------
10
+ print("Loading tokenizer...")
11
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
12
+
13
+ # Make sure we have a pad token
14
+ if tokenizer.pad_token is None and tokenizer.eos_token is not None:
15
+ tokenizer.pad_token = tokenizer.eos_token
16
+
17
+ print("Detecting device & dtype...")
18
+ if torch.cuda.is_available():
19
+ # On ZeroGPU / real GPU: use bf16 if supported, else fp16
20
+ try:
21
+ bf16_ok = torch.cuda.is_bf16_supported()
22
+ except AttributeError:
23
+ bf16_ok = False
24
+
25
+ torch_dtype = torch.bfloat16 if bf16_ok else torch.float16
26
+ device_map = "auto"
27
+ else:
28
+ # CPU fallback
29
+ torch_dtype = torch.float32
30
+ device_map = "cpu"
31
+
32
+ print(f"Loading model on {device_map} with dtype={torch_dtype}...")
33
+ model = AutoModelForCausalLM.from_pretrained(
34
+ MODEL_ID,
35
+ torch_dtype=torch_dtype,
36
+ device_map=device_map,
37
+ )
38
+ model.eval()
39
+
40
+ # ----------------------
41
+ # Helper: build chat input
42
+ # ----------------------
43
+ def build_inputs(prompt: str):
44
+ """
45
+ Build input_ids using the model's chat_template.
46
+ We give it a simple system + user conversation and
47
+ ask tokenizer to add the assistant generation prompt.
48
+ """
49
+ messages = [
50
+ {
51
+ "role": "system",
52
+ "content": (
53
+ "You are an educational AI tutor. "
54
+ "Explain clearly and precisely, focusing on math, science, "
55
+ "engineering, programming, and medical education. "
56
+ "Show intermediate steps when useful, but avoid rambling."
57
+ ),
58
+ },
59
+ {
60
+ "role": "user",
61
+ "content": prompt,
62
+ },
63
+ ]
64
+
65
+ input_ids = tokenizer.apply_chat_template(
66
+ messages,
67
+ add_generation_prompt=True,
68
+ return_tensors="pt",
69
+ )
70
+
71
+ return input_ids.to(model.device)
72
+
73
+ # ----------------------
74
+ # Generation function
75
+ # ----------------------
76
+ def generate(
77
+ prompt: str,
78
+ temperature: float = 0.7,
79
+ top_p: float = 0.9,
80
+ max_new_tokens: int = 256,
81
+ ):
82
+ if not prompt.strip():
83
+ return "Please enter a prompt."
84
+
85
+ input_ids = build_inputs(prompt)
86
+
87
+ with torch.no_grad():
88
+ output_ids = model.generate(
89
+ input_ids=input_ids,
90
+ max_new_tokens=max_new_tokens,
91
+ do_sample=True,
92
+ temperature=temperature,
93
+ top_p=top_p,
94
+ repetition_penalty=1.1, # light anti-repetition
95
+ pad_token_id=tokenizer.pad_token_id,
96
+ eos_token_id=tokenizer.eos_token_id,
97
+ )
98
+
99
+ # Keep only the newly generated tokens (assistant part)
100
+ new_tokens = output_ids[0, input_ids.shape[-1]:]
101
+ text = tokenizer.decode(new_tokens, skip_special_tokens=True)
102
+ text = text.strip()
103
+
104
+ if not text:
105
+ text = "[Empty response]"
106
+ return text
107
+
108
+ # ----------------------
109
+ # Gradio UI
110
+ # ----------------------
111
+ with gr.Blocks() as demo:
112
+ gr.Markdown(
113
+ """
114
+ # QVAC Genesis I – Educational LLM Demo
115
+
116
+ Model: **qvac/genesis-i-model**
117
+ Trained on the QVAC Genesis I synthetic educational dataset (STEM-heavy).
118
+ Ask it math, science, engineering, or medical education questions.
119
+ """
120
+ )
121
+
122
+ with gr.Row():
123
+ with gr.Column(scale=3):
124
+ prompt = gr.Textbox(
125
+ label="Prompt",
126
+ placeholder="Example: Explain why 2 + 2 = 4 in a way a 10-year-old can understand.",
127
+ lines=6,
128
+ )
129
+ temperature = gr.Slider(
130
+ minimum=0.1,
131
+ maximum=1.2,
132
+ value=0.7,
133
+ step=0.05,
134
+ label="Temperature (creativity)",
135
+ )
136
+ top_p = gr.Slider(
137
+ minimum=0.1,
138
+ maximum=1.0,
139
+ value=0.9,
140
+ step=0.05,
141
+ label="Top-p (nucleus sampling)",
142
+ )
143
+ max_new_tokens = gr.Slider(
144
+ minimum=16,
145
+ maximum=512,
146
+ value=256,
147
+ step=16,
148
+ label="Max new tokens",
149
+ )
150
+ submit = gr.Button("Generate")
151
+
152
+ with gr.Column(scale=4):
153
+ output = gr.Textbox(
154
+ label="Model output",
155
+ lines=18,
156
+ )
157
+
158
+ submit.click(
159
+ fn=generate,
160
+ inputs=[prompt, temperature, top_p, max_new_tokens],
161
+ outputs=output,
162
+ )
163
+
164
+ # Press Enter in the prompt box to generate
165
+ prompt.submit(
166
+ fn=generate,
167
+ inputs=[prompt, temperature, top_p, max_new_tokens],
168
+ outputs=output,
169
+ )
170
+
171
+ demo.queue().launch()