VladBoyko commited on
Commit
a271ec8
Β·
verified Β·
1 Parent(s): 6368a59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -47
app.py CHANGED
@@ -20,19 +20,19 @@ class VibeThinkerVLLM:
20
  model="WeiboAI/VibeThinker-1.5B",
21
  dtype="float16", # Use float16 instead of bfloat16 for T4
22
  gpu_memory_utilization=0.85,
23
- max_model_len=16384, # Reduced from 40960 for T4 stability
24
  enforce_eager=True, # Disable CUDA graphs for T4
25
  disable_custom_all_reduce=True, # Avoid custom kernels
26
  enable_prefix_caching=False, # Disable for stability
27
  max_num_seqs=1, # Process one sequence at a time
28
  trust_remote_code=True
29
  )
30
- print("βœ… vLLM model loaded successfully with T4-compatible settings")
31
  except Exception as e:
32
  print(f"❌ Error loading model: {e}")
33
  raise
34
 
35
- def generate_response(self, prompt, temperature=0.6, max_tokens=8192, max_thinking_tokens=4096):
36
  """
37
  Generate response with thinking length control and loop detection
38
 
@@ -106,7 +106,6 @@ Keep reasoning under {max_thinking_tokens} tokens. DO NOT repeat yourself.<|im_e
106
 
107
  def _detect_loop(self, text):
108
  """Detect if text contains repetitive loops"""
109
- # Check for repeated phrases
110
  words = text.split()
111
  if len(words) < 20:
112
  return False
@@ -129,71 +128,84 @@ Keep reasoning under {max_thinking_tokens} tokens. DO NOT repeat yourself.<|im_e
129
  rest_start = i + length
130
  rest = ' '.join(words[rest_start:])
131
  if phrase in rest:
132
- # Truncate at first repetition
133
  return ' '.join(words[:rest_start])
134
  return text
135
 
136
  def parse_model_output(text):
137
  """
138
  Parse model output to separate thinking and final answer
 
139
  Returns: (thinking_content, answer_content, code_blocks)
140
  """
141
- # Remove common loop warning
142
  loop_warning = ""
143
  if "[Loop detected and truncated]" in text:
144
  loop_warning = "\n\n⚠️ **Note**: Repetitive content was detected and removed"
145
  text = text.replace("⚠️ *[Loop detected and truncated]*", "")
146
 
147
- # Try to find thinking section (common patterns)
148
  thinking_patterns = [
149
  r'<think>(.*?)</think>',
150
  r'<thinking>(.*?)</thinking>',
151
- r'(?:Let me think|Let\'s think|Analysis):(.*?)(?=\n\n[SA-Z]|Solution:|Code:|```|\Z)',
152
  ]
153
 
154
  thinking_content = ""
 
 
155
  for pattern in thinking_patterns:
156
  match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
157
  if match:
158
  thinking_content = match.group(1).strip()
159
- text = re.sub(pattern, '', text, flags=re.DOTALL | re.IGNORECASE)
160
  break
161
 
162
- # If no explicit thinking, extract first paragraph if it's analytical
 
163
  if not thinking_content:
164
- paragraphs = text.split('\n\n')
165
- if paragraphs and len(paragraphs[0]) > 50 and len(paragraphs[0]) < 500:
166
- first_para = paragraphs[0].lower()
167
- if any(word in first_para for word in ['approach', 'step', 'first', 'algorithm', 'solution']):
168
- thinking_content = paragraphs[0]
169
- text = '\n\n'.join(paragraphs[1:])
 
 
 
 
 
 
 
 
 
 
170
 
171
- # Extract code blocks
172
  code_pattern = r'```(\w+)?\n(.*?)```'
173
- code_blocks = re.findall(code_pattern, text, re.DOTALL)
174
 
175
- # Extract final answer (boxed or explicit)
176
- answer_match = re.search(r'\\boxed\{([^}]+)\}', text)
177
  if answer_match:
178
- answer_content = f"**Final Answer:** {answer_match.group(1)}"
179
  else:
180
- answer_content = text.strip()
181
 
182
- answer_content += loop_warning
183
 
184
- return thinking_content, answer_content, code_blocks
185
 
186
  def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_tokens, generation_time):
187
  """
188
- Format output as styled HTML with good contrast and modern design
189
  """
190
  total_tokens = prompt_tokens + completion_tokens
191
  thinking_tokens_est = len(thinking.split()) * 1.3 if thinking else 0
192
  tokens_per_sec = completion_tokens / generation_time if generation_time > 0 else 0
193
 
194
- # Build thinking section HTML if exists
195
  thinking_html = ""
196
  if thinking:
 
 
197
  thinking_html = f"""
198
  <details style="background: #f8f9fa; border: 2px solid #e9ecef; border-radius: 12px; padding: 20px; margin-bottom: 24px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
199
  <summary style="cursor: pointer; font-weight: 600; font-size: 16px; color: #495057; user-select: none; display: flex; align-items: center; gap: 8px;">
@@ -202,29 +214,42 @@ def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_
202
  <span style="margin-left: auto; font-size: 12px; color: #6c757d;">Click to expand/collapse</span>
203
  </summary>
204
  <div style="margin-top: 16px; padding-top: 16px; border-top: 1px solid #dee2e6; color: #212529; line-height: 1.7; white-space: pre-wrap; font-size: 14px; font-family: 'SF Mono', Monaco, Consolas, monospace; background: #ffffff; padding: 16px; border-radius: 8px;">
205
- {thinking}
206
  </div>
207
  </details>
208
  """
209
 
210
- # Build code blocks HTML if exist
211
  code_html = ""
212
  if code_blocks:
213
  code_blocks_html = ""
214
- for lang, code in code_blocks:
215
  lang_display = lang if lang else "code"
 
 
 
 
 
216
  code_blocks_html += f"""
217
  <div style="margin-bottom: 16px; background: #1e1e1e; border-radius: 12px; overflow: hidden; box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
218
  <div style="background: #2d2d2d; padding: 12px 20px; color: #ffffff; font-weight: 600; font-size: 13px; display: flex; justify-content: space-between; align-items: center; border-bottom: 1px solid #3d3d3d;">
219
  <span>{lang_display}</span>
220
- <button onclick="navigator.clipboard.writeText(this.parentElement.nextElementSibling.textContent)"
221
- style="background: #4CAF50; color: white; border: none; padding: 6px 14px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: background 0.2s;"
222
- onmouseover="this.style.background='#45a049'"
223
- onmouseout="this.style.background='#4CAF50'">
224
- πŸ“‹ Copy
225
- </button>
 
 
 
 
 
 
 
 
226
  </div>
227
- <pre style="margin: 0; padding: 20px; color: #d4d4d4; overflow-x: auto; font-family: 'SF Mono', Monaco, Consolas, monospace; font-size: 14px; line-height: 1.6;"><code>{code.strip()}</code></pre>
228
  </div>
229
  """
230
 
@@ -235,6 +260,35 @@ def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_
235
  </h3>
236
  {code_blocks_html}
237
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  """
239
 
240
  html = f"""
@@ -271,7 +325,7 @@ def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_
271
  </div>
272
  </div>
273
 
274
- <!-- Thinking Section -->
275
  {thinking_html}
276
 
277
  <!-- Answer Section -->
@@ -284,7 +338,7 @@ def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_
284
  </div>
285
  </div>
286
 
287
- <!-- Code Blocks -->
288
  {code_html}
289
 
290
  </div>
@@ -292,18 +346,16 @@ def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_
292
  return html
293
 
294
  # Initialize model
295
- print("πŸ”„ Initializing VibeThinker with vLLM (T4-optimized)...")
296
  vibe_model = VibeThinkerVLLM()
297
 
298
- def generate_solution(prompt, temperature=0.6, max_tokens=8192, max_thinking_tokens=4096, progress=gr.Progress()):
299
  """Generate and format solution with progress tracking"""
300
  if not prompt.strip():
301
  return "<p style='color: #dc3545; font-size: 16px; padding: 20px;'>⚠️ Please enter a problem to solve.</p>"
302
 
303
- # Show progress
304
  progress(0, desc="πŸ”„ Initializing generation...")
305
 
306
- # Generate response
307
  progress(0.3, desc="🧠 Model is thinking...")
308
  response, prompt_tokens, completion_tokens, gen_time = vibe_model.generate_response(
309
  prompt,
@@ -314,7 +366,7 @@ def generate_solution(prompt, temperature=0.6, max_tokens=8192, max_thinking_tok
314
 
315
  progress(0.8, desc="πŸ“ Formatting output...")
316
 
317
- # Parse output
318
  thinking, answer, code_blocks = parse_model_output(response)
319
 
320
  # Format as HTML
@@ -341,7 +393,7 @@ with gr.Blocks(
341
 
342
  **Optimized for**: Competitive programming (LeetCode, Codeforces, AtCoder) and algorithm challenges
343
 
344
- ⚑ **Powered by vLLM** | 🎯 **Best for**: Python algorithmic problems with clear input/output specs
345
 
346
  ⚠️ **Note**: This model is specialized for competitive programming, not general software development
347
  """)
@@ -364,10 +416,10 @@ with gr.Blocks(
364
  )
365
  max_tokens_slider = gr.Slider(
366
  minimum=1024,
367
- maximum=16384,
368
- value=8192,
369
  step=1024,
370
- label="πŸ“ Max Total Tokens"
371
  )
372
  max_thinking_slider = gr.Slider(
373
  minimum=512,
@@ -383,6 +435,7 @@ with gr.Blocks(
383
  - Higher thinking tokens (4096-8192) for complex problems requiring detailed reasoning
384
  - Temperature 0.6 balances creativity and accuracy
385
  - Loop detection is automatic - repetitive output will be truncated
 
386
  """)
387
 
388
  generate_btn = gr.Button("πŸš€ Generate Solution", variant="primary", size="lg")
 
20
  model="WeiboAI/VibeThinker-1.5B",
21
  dtype="float16", # Use float16 instead of bfloat16 for T4
22
  gpu_memory_utilization=0.85,
23
+ max_model_len=40960, # Full 40K context as per docs
24
  enforce_eager=True, # Disable CUDA graphs for T4
25
  disable_custom_all_reduce=True, # Avoid custom kernels
26
  enable_prefix_caching=False, # Disable for stability
27
  max_num_seqs=1, # Process one sequence at a time
28
  trust_remote_code=True
29
  )
30
+ print("βœ… vLLM model loaded successfully with T4-compatible settings (40K context)")
31
  except Exception as e:
32
  print(f"❌ Error loading model: {e}")
33
  raise
34
 
35
+ def generate_response(self, prompt, temperature=0.6, max_tokens=16384, max_thinking_tokens=4096):
36
  """
37
  Generate response with thinking length control and loop detection
38
 
 
106
 
107
  def _detect_loop(self, text):
108
  """Detect if text contains repetitive loops"""
 
109
  words = text.split()
110
  if len(words) < 20:
111
  return False
 
128
  rest_start = i + length
129
  rest = ' '.join(words[rest_start:])
130
  if phrase in rest:
 
131
  return ' '.join(words[:rest_start])
132
  return text
133
 
134
  def parse_model_output(text):
135
  """
136
  Parse model output to separate thinking and final answer
137
+ ONLY extract code from the final answer section, not from thinking
138
  Returns: (thinking_content, answer_content, code_blocks)
139
  """
 
140
  loop_warning = ""
141
  if "[Loop detected and truncated]" in text:
142
  loop_warning = "\n\n⚠️ **Note**: Repetitive content was detected and removed"
143
  text = text.replace("⚠️ *[Loop detected and truncated]*", "")
144
 
145
+ # Try to find explicit thinking delimiters
146
  thinking_patterns = [
147
  r'<think>(.*?)</think>',
148
  r'<thinking>(.*?)</thinking>',
 
149
  ]
150
 
151
  thinking_content = ""
152
+ answer_content = text
153
+
154
  for pattern in thinking_patterns:
155
  match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
156
  if match:
157
  thinking_content = match.group(1).strip()
158
+ answer_content = re.sub(pattern, '', text, flags=re.DOTALL | re.IGNORECASE).strip()
159
  break
160
 
161
+ # If no explicit thinking tags, try to detect reasoning section
162
+ # Look for a natural break like "Solution:" or "Here's the code:"
163
  if not thinking_content:
164
+ split_markers = [
165
+ r'(.*?)(?=\n\n(?:Solution|Here\'s|Implementation|Code|Final).*?:)',
166
+ r'(.*?)(?=\n\n```)', # Before first code block
167
+ ]
168
+
169
+ for pattern in split_markers:
170
+ match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
171
+ if match:
172
+ potential_thinking = match.group(1).strip()
173
+ # Only treat as thinking if it's substantial (>100 chars) and contains reasoning keywords
174
+ if len(potential_thinking) > 100:
175
+ thinking_lower = potential_thinking.lower()
176
+ if any(word in thinking_lower for word in ['step', 'approach', 'idea', 'first', 'we can', 'let\'s']):
177
+ thinking_content = potential_thinking
178
+ answer_content = text[len(potential_thinking):].strip()
179
+ break
180
 
181
+ # NOW extract code blocks ONLY from answer_content (not from thinking)
182
  code_pattern = r'```(\w+)?\n(.*?)```'
183
+ code_blocks = re.findall(code_pattern, answer_content, re.DOTALL)
184
 
185
+ # Extract final answer (boxed or use answer_content as-is)
186
+ answer_match = re.search(r'\\boxed\{([^}]+)\}', answer_content)
187
  if answer_match:
188
+ final_answer = f"**Final Answer:** {answer_match.group(1)}"
189
  else:
190
+ final_answer = answer_content
191
 
192
+ final_answer += loop_warning
193
 
194
+ return thinking_content, final_answer, code_blocks
195
 
196
  def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_tokens, generation_time):
197
  """
198
+ Format output as styled HTML - thinking is plain text, code blocks are from final answer only
199
  """
200
  total_tokens = prompt_tokens + completion_tokens
201
  thinking_tokens_est = len(thinking.split()) * 1.3 if thinking else 0
202
  tokens_per_sec = completion_tokens / generation_time if generation_time > 0 else 0
203
 
204
+ # Build thinking section HTML - PLAIN TEXT, NO CODE PARSING
205
  thinking_html = ""
206
  if thinking:
207
+ # Escape any HTML in thinking to prevent rendering
208
+ thinking_escaped = thinking.replace('<', '&lt;').replace('>', '&gt;')
209
  thinking_html = f"""
210
  <details style="background: #f8f9fa; border: 2px solid #e9ecef; border-radius: 12px; padding: 20px; margin-bottom: 24px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
211
  <summary style="cursor: pointer; font-weight: 600; font-size: 16px; color: #495057; user-select: none; display: flex; align-items: center; gap: 8px;">
 
214
  <span style="margin-left: auto; font-size: 12px; color: #6c757d;">Click to expand/collapse</span>
215
  </summary>
216
  <div style="margin-top: 16px; padding-top: 16px; border-top: 1px solid #dee2e6; color: #212529; line-height: 1.7; white-space: pre-wrap; font-size: 14px; font-family: 'SF Mono', Monaco, Consolas, monospace; background: #ffffff; padding: 16px; border-radius: 8px;">
217
+ {thinking_escaped}
218
  </div>
219
  </details>
220
  """
221
 
222
+ # Build code blocks HTML - ONLY from final answer
223
  code_html = ""
224
  if code_blocks:
225
  code_blocks_html = ""
226
+ for idx, (lang, code) in enumerate(code_blocks):
227
  lang_display = lang if lang else "code"
228
+ code_id = f"code_{idx}"
229
+
230
+ # Create downloadable version
231
+ code_clean = code.strip()
232
+
233
  code_blocks_html += f"""
234
  <div style="margin-bottom: 16px; background: #1e1e1e; border-radius: 12px; overflow: hidden; box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
235
  <div style="background: #2d2d2d; padding: 12px 20px; color: #ffffff; font-weight: 600; font-size: 13px; display: flex; justify-content: space-between; align-items: center; border-bottom: 1px solid #3d3d3d;">
236
  <span>{lang_display}</span>
237
+ <div style="display: flex; gap: 8px;">
238
+ <button onclick="navigator.clipboard.writeText(document.getElementById('{code_id}').textContent)"
239
+ style="background: #4CAF50; color: white; border: none; padding: 6px 14px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: background 0.2s;"
240
+ onmouseover="this.style.background='#45a049'"
241
+ onmouseout="this.style.background='#4CAF50'">
242
+ πŸ“‹ Copy
243
+ </button>
244
+ <button onclick="downloadCode(document.getElementById('{code_id}').textContent, '{lang_display}')"
245
+ style="background: #2196F3; color: white; border: none; padding: 6px 14px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: background 0.2s;"
246
+ onmouseover="this.style.background='#0b7dda'"
247
+ onmouseout="this.style.background='#2196F3'">
248
+ πŸ’Ύ Download
249
+ </button>
250
+ </div>
251
  </div>
252
+ <pre style="margin: 0; padding: 20px; color: #d4d4d4; overflow-x: auto; font-family: 'SF Mono', Monaco, Consolas, monospace; font-size: 14px; line-height: 1.6;"><code id="{code_id}">{code_clean}</code></pre>
253
  </div>
254
  """
255
 
 
260
  </h3>
261
  {code_blocks_html}
262
  </div>
263
+
264
+ <script>
265
+ function downloadCode(code, lang) {{
266
+ const extensions = {{
267
+ 'python': 'py',
268
+ 'javascript': 'js',
269
+ 'java': 'java',
270
+ 'cpp': 'cpp',
271
+ 'c': 'c',
272
+ 'html': 'html',
273
+ 'css': 'css',
274
+ 'typescript': 'ts',
275
+ 'rust': 'rs',
276
+ 'go': 'go',
277
+ }};
278
+ const ext = extensions[lang.toLowerCase()] || 'txt';
279
+ const filename = `solution.${{ext}}`;
280
+
281
+ const blob = new Blob([code], {{ type: 'text/plain' }});
282
+ const url = window.URL.createObjectURL(blob);
283
+ const a = document.createElement('a');
284
+ a.href = url;
285
+ a.download = filename;
286
+ document.body.appendChild(a);
287
+ a.click();
288
+ document.body.removeChild(a);
289
+ window.URL.revokeObjectURL(url);
290
+ }}
291
+ </script>
292
  """
293
 
294
  html = f"""
 
325
  </div>
326
  </div>
327
 
328
+ <!-- Thinking Section (Plain Text Only) -->
329
  {thinking_html}
330
 
331
  <!-- Answer Section -->
 
338
  </div>
339
  </div>
340
 
341
+ <!-- Code Blocks (From Final Answer Only) -->
342
  {code_html}
343
 
344
  </div>
 
346
  return html
347
 
348
  # Initialize model
349
+ print("πŸ”„ Initializing VibeThinker with vLLM (T4-optimized, 40K context)...")
350
  vibe_model = VibeThinkerVLLM()
351
 
352
+ def generate_solution(prompt, temperature=0.6, max_tokens=16384, max_thinking_tokens=4096, progress=gr.Progress()):
353
  """Generate and format solution with progress tracking"""
354
  if not prompt.strip():
355
  return "<p style='color: #dc3545; font-size: 16px; padding: 20px;'>⚠️ Please enter a problem to solve.</p>"
356
 
 
357
  progress(0, desc="πŸ”„ Initializing generation...")
358
 
 
359
  progress(0.3, desc="🧠 Model is thinking...")
360
  response, prompt_tokens, completion_tokens, gen_time = vibe_model.generate_response(
361
  prompt,
 
366
 
367
  progress(0.8, desc="πŸ“ Formatting output...")
368
 
369
+ # Parse output - thinking stays as plain text, code only from answer
370
  thinking, answer, code_blocks = parse_model_output(response)
371
 
372
  # Format as HTML
 
393
 
394
  **Optimized for**: Competitive programming (LeetCode, Codeforces, AtCoder) and algorithm challenges
395
 
396
+ ⚑ **Powered by vLLM** (40K context) | 🎯 **Best for**: Python algorithmic problems with clear input/output specs
397
 
398
  ⚠️ **Note**: This model is specialized for competitive programming, not general software development
399
  """)
 
416
  )
417
  max_tokens_slider = gr.Slider(
418
  minimum=1024,
419
+ maximum=40960,
420
+ value=16384,
421
  step=1024,
422
+ label="πŸ“ Max Total Tokens (40K max)"
423
  )
424
  max_thinking_slider = gr.Slider(
425
  minimum=512,
 
435
  - Higher thinking tokens (4096-8192) for complex problems requiring detailed reasoning
436
  - Temperature 0.6 balances creativity and accuracy
437
  - Loop detection is automatic - repetitive output will be truncated
438
+ - Code blocks shown are from final solution only (not from reasoning process)
439
  """)
440
 
441
  generate_btn = gr.Button("πŸš€ Generate Solution", variant="primary", size="lg")