VladBoyko commited on
Commit
41ff223
·
verified ·
1 Parent(s): c1535fc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +198 -0
app.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
4
+ import spaces
5
+
6
+ class VibeThinker:
7
+ def __init__(self, model_path="WeiboAI/VibeThinker-1.5B"):
8
+ self.model_path = model_path
9
+ print("Loading model... This may take a minute.")
10
+
11
+ self.model = AutoModelForCausalLM.from_pretrained(
12
+ self.model_path,
13
+ low_cpu_mem_usage=True,
14
+ torch_dtype=torch.bfloat16,
15
+ device_map="auto",
16
+ trust_remote_code=True
17
+ )
18
+
19
+ self.tokenizer = AutoTokenizer.from_pretrained(
20
+ self.model_path,
21
+ trust_remote_code=True
22
+ )
23
+
24
+ print(f"Model loaded successfully!")
25
+ print(f"Using device: {self.model.device}")
26
+ if torch.cuda.is_available():
27
+ print(f"CUDA device: {torch.cuda.get_device_name(0)}")
28
+
29
+ @spaces.GPU # This decorator allocates GPU when function is called (for ZeroGPU spaces)
30
+ def infer_text(self, prompt, temperature=0.6, max_tokens=40960, top_p=0.95):
31
+ """
32
+ Generate response for a given prompt
33
+
34
+ Args:
35
+ prompt: The input question (preferably in English)
36
+ temperature: Controls randomness (0.6 or 1.0 recommended)
37
+ max_tokens: Maximum tokens to generate
38
+ top_p: Nucleus sampling parameter
39
+ """
40
+ messages = [
41
+ {"role": "user", "content": prompt}
42
+ ]
43
+
44
+ text = self.tokenizer.apply_chat_template(
45
+ messages,
46
+ tokenize=False,
47
+ add_generation_prompt=True
48
+ )
49
+
50
+ model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
51
+
52
+ generation_config = dict(
53
+ max_new_tokens=max_tokens,
54
+ do_sample=True,
55
+ temperature=temperature,
56
+ top_p=top_p,
57
+ top_k=None # Set to -1 in vLLM/SGLang
58
+ )
59
+
60
+ print(f"Generating response with temperature={temperature}, max_tokens={max_tokens}...")
61
+
62
+ generated_ids = self.model.generate(
63
+ **model_inputs,
64
+ generation_config=GenerationConfig(**generation_config)
65
+ )
66
+
67
+ generated_ids = [
68
+ output_ids[len(input_ids):]
69
+ for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
70
+ ]
71
+
72
+ response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
73
+
74
+ return response
75
+
76
+
77
+ # Initialize model
78
+ print("Initializing VibeThinker-1.5B...")
79
+ model = VibeThinker()
80
+
81
+ # Create Gradio interface
82
+ def generate_response(prompt, temperature, max_tokens, top_p):
83
+ if not prompt.strip():
84
+ return "Please enter a question."
85
+
86
+ try:
87
+ response = model.infer_text(
88
+ prompt=prompt,
89
+ temperature=temperature,
90
+ max_tokens=max_tokens,
91
+ top_p=top_p
92
+ )
93
+ return response
94
+ except Exception as e:
95
+ return f"Error: {str(e)}"
96
+
97
+
98
+ # Gradio UI
99
+ with gr.Blocks(title="VibeThinker-1.5B Math & Code Reasoning") as demo:
100
+ gr.Markdown("""
101
+ # 🧠 VibeThinker-1.5B: Reasoning Model
102
+
103
+ **Optimized for**: Competitive math problems and algorithm coding challenges
104
+
105
+ **Note**: This model works best with questions in English. It's specifically trained for
106
+ mathematical reasoning and competitive programming tasks.
107
+
108
+ ### Example Prompts:
109
+ - "Solve: Find all solutions to x^3 - 3x^2 + 4 = 0"
110
+ - "Write a Python function to find the longest palindromic substring in O(n^2) time"
111
+ - "Prove that the sum of angles in a triangle equals 180 degrees"
112
+
113
+ [GitHub](https://github.com/WeiboAI/VibeThinker) | [HuggingFace Model](https://huggingface.co/WeiboAI/VibeThinker-1.5B) | [Paper](https://huggingface.co/papers/2511.06221)
114
+ """)
115
+
116
+ with gr.Row():
117
+ with gr.Column(scale=1):
118
+ prompt_input = gr.Textbox(
119
+ label="Your Question",
120
+ placeholder="Enter your math problem or coding challenge here (in English)...",
121
+ lines=5
122
+ )
123
+
124
+ with gr.Accordion("Advanced Settings", open=False):
125
+ temperature_slider = gr.Slider(
126
+ minimum=0.1,
127
+ maximum=1.5,
128
+ value=0.6,
129
+ step=0.1,
130
+ label="Temperature (0.6 or 1.0 recommended)"
131
+ )
132
+
133
+ max_tokens_slider = gr.Slider(
134
+ minimum=512,
135
+ maximum=40960,
136
+ value=8192,
137
+ step=512,
138
+ label="Max Tokens (model supports up to 40,960)"
139
+ )
140
+
141
+ top_p_slider = gr.Slider(
142
+ minimum=0.1,
143
+ maximum=1.0,
144
+ value=0.95,
145
+ step=0.05,
146
+ label="Top P"
147
+ )
148
+
149
+ submit_btn = gr.Button("🚀 Generate Solution", variant="primary")
150
+ clear_btn = gr.Button("🗑️ Clear")
151
+
152
+ with gr.Column(scale=1):
153
+ output_text = gr.Textbox(
154
+ label="Model Response",
155
+ lines=20,
156
+ show_copy_button=True
157
+ )
158
+
159
+ # Example questions
160
+ gr.Examples(
161
+ examples=[
162
+ ["Find the number of positive integers n ≤ 1000 such that n^2 + n + 41 is prime.", 0.6, 8192, 0.95],
163
+ ["Write an efficient algorithm to solve the 0-1 knapsack problem using dynamic programming.", 0.6, 8192, 0.95],
164
+ ["Prove that √2 is irrational using proof by contradiction.", 0.6, 8192, 0.95],
165
+ ["A tank can be filled by pipe A in 3 hours and pipe B in 5 hours. If both pipes are opened together, how long will it take to fill the tank?", 0.6, 8192, 0.95],
166
+ ],
167
+ inputs=[prompt_input, temperature_slider, max_tokens_slider, top_p_slider],
168
+ label="Example Problems"
169
+ )
170
+
171
+ # Event handlers
172
+ submit_btn.click(
173
+ fn=generate_response,
174
+ inputs=[prompt_input, temperature_slider, max_tokens_slider, top_p_slider],
175
+ outputs=output_text
176
+ )
177
+
178
+ clear_btn.click(
179
+ fn=lambda: ("", ""),
180
+ inputs=[],
181
+ outputs=[prompt_input, output_text]
182
+ )
183
+
184
+ gr.Markdown("""
185
+ ---
186
+ ### 📊 Model Performance Highlights:
187
+ - **AIME24**: 80.3 (vs DeepSeek R1: 79.8)
188
+ - **AIME25**: 74.4 (vs DeepSeek R1: 70.0)
189
+ - **LiveCodeBench v6**: 51.1
190
+ - **Parameters**: Only 1.5B (400x smaller than DeepSeek R1!)
191
+
192
+ **Training Cost**: $7,800 USD | **License**: MIT
193
+ """)
194
+
195
+ # Launch the app
196
+ if __name__ == "__main__":
197
+ demo.queue() # Enable queuing for better UX
198
+ demo.launch()