VladBoyko commited on
Commit
8a46019
·
verified ·
1 Parent(s): ebe1956

Update app.py

Browse files

Updated Output handling

Files changed (1) hide show
  1. app.py +280 -478
app.py CHANGED
@@ -1,76 +1,97 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
- from threading import Thread
5
  import re
6
  import time
 
 
 
 
7
 
8
  class VibeThinkerModel:
9
  def __init__(self):
10
  self.model = None
11
  self.tokenizer = None
12
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
13
- self.load_model()
14
-
15
  def load_model(self):
16
- """Load VibeThinker model with transformers"""
 
17
  try:
18
- print("🔄 Loading VibeThinker-1.5B with transformers...")
19
-
20
- self.tokenizer = AutoTokenizer.from_pretrained(
21
- "WeiboAI/VibeThinker-1.5B",
22
- trust_remote_code=True
23
- )
24
-
25
  self.model = AutoModelForCausalLM.from_pretrained(
26
- "WeiboAI/VibeThinker-1.5B",
27
- torch_dtype=torch.float16,
28
  device_map="auto",
29
- trust_remote_code=True
 
30
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- print(f"✅ Model loaded successfully on {self.device}")
33
- print(f"💾 Model memory: ~{self.model.get_memory_footprint() / 1e9:.2f} GB")
 
34
 
35
- except Exception as e:
36
- print(f"❌ Error loading model: {e}")
37
- raise
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- def generate_response_streaming(self, prompt, temperature=0.6, max_new_tokens=8192, max_thinking_tokens=4096):
40
- """Generate response with streaming and real-time loop detection"""
41
- if not self.model or not self.tokenizer:
42
- yield "Model not loaded!", None, False
43
- return
44
 
45
  try:
46
  start_time = time.time()
47
 
48
- formatted_prompt = f"""<|im_start|>system
49
- You are a competitive programming expert. Provide clear, concise solutions to coding problems.
50
-
51
- Format your response as:
52
- 1. Brief analysis (2-3 sentences)
53
- 2. Solution approach
54
- 3. Implementation code
55
- 4. Test cases
56
-
57
- Keep reasoning under {max_thinking_tokens} tokens. Be direct and avoid repetition.<|im_end|>
58
- <|im_start|>user
59
- {prompt}<|im_end|>
60
- <|im_start|>assistant
61
- """
62
-
63
- inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.device)
64
- prompt_length = inputs.input_ids.shape[1]
65
 
66
- # Create streamer
67
- streamer = TextIteratorStreamer(
68
- self.tokenizer,
69
- skip_prompt=True,
70
- skip_special_tokens=True
71
- )
72
 
73
- # Generation kwargs
74
  generation_kwargs = dict(
75
  **inputs,
76
  max_new_tokens=max_new_tokens,
@@ -78,494 +99,275 @@ Keep reasoning under {max_thinking_tokens} tokens. Be direct and avoid repetitio
78
  top_p=0.95,
79
  top_k=50,
80
  do_sample=True,
81
- repetition_penalty=1.1,
82
  pad_token_id=self.tokenizer.eos_token_id,
83
  streamer=streamer,
84
  )
85
 
86
- # Start generation in background thread
87
  thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
88
  thread.start()
89
 
90
- # Stream tokens with real-time loop detection
91
  generated_text = ""
92
- last_loop_check = ""
93
  loop_detected = False
94
 
 
 
 
95
  for new_text in streamer:
96
- if loop_detected:
97
- break # Stop streaming if loop detected
98
 
99
  generated_text += new_text
100
- generation_time = time.time() - start_time
101
- tokens_generated = len(self.tokenizer.encode(generated_text))
102
 
103
- # Check for loops every ~50 tokens
104
- if len(generated_text) - len(last_loop_check) > 200:
105
- if self._detect_loop_realtime(generated_text):
106
  loop_detected = True
107
- # Truncate at loop point
108
- generated_text = self._truncate_loop(generated_text)
109
- last_loop_check = generated_text
110
-
111
- # Yield current state
 
112
  yield generated_text, {
113
- "prompt_tokens": prompt_length,
114
- "tokens_generated": tokens_generated,
115
- "generation_time": generation_time,
116
- "is_complete": False
117
- }, loop_detected
118
 
119
- # Wait for thread to complete
120
- thread.join()
121
-
122
- # Final yield
123
- final_time = time.time() - start_time
124
- final_tokens = len(self.tokenizer.encode(generated_text))
125
 
126
  yield generated_text, {
127
- "prompt_tokens": prompt_length,
128
- "completion_tokens": final_tokens,
129
- "generation_time": final_time,
130
- "tokens_per_sec": final_tokens / final_time if final_time > 0 else 0,
131
- "is_complete": True
132
- }, loop_detected
133
 
134
  except Exception as e:
135
- yield f"Error during generation: {str(e)}", None, False
136
-
137
- def _detect_loop_realtime(self, text, check_window=200, min_repetitions=5):
138
- """Quick loop detection for real-time streaming.
139
-
140
- Real loops repeat SHORT phrases (3-9 words) MANY times (5+ repetitions).
141
- Example: "Wait, let me reconsider... Wait, let me reconsider... Wait, let me reconsider..."
142
-
143
- Args:
144
- text: Full generated text
145
- check_window: Number of recent words to check (default: 200)
146
- min_repetitions: Minimum repetitions to consider a loop (default: 5)
147
-
148
- Returns:
149
- bool: True if a loop is detected
150
- """
151
- words = text.split()
152
- if len(words) < 30:
153
- return False
154
-
155
- # Check last N words for repetitive patterns
156
- recent_words = words[-check_window:] if len(words) > check_window else words
157
- text_to_check = ' '.join(recent_words)
158
-
159
- # Look for short phrases (3-9 words) repeated multiple times
160
- for phrase_len in range(3, 10): # 3 to 9 words
161
- if len(recent_words) < phrase_len * min_repetitions:
162
- continue
163
-
164
- # Check different starting positions
165
- for i in range(len(recent_words) - phrase_len):
166
- phrase = ' '.join(recent_words[i:i+phrase_len])
167
-
168
- # Count how many times this phrase appears
169
- count = text_to_check.count(phrase)
170
-
171
- # If phrase appears 5+ times, it's a loop
172
- if count >= min_repetitions:
173
- return True
174
-
175
- return False
176
-
177
- def _truncate_loop(self, text, min_repetitions=5):
178
- """Truncate text at the point where loop starts.
179
-
180
- Find where a 3-9 word phrase starts repeating 5+ times and truncate there.
181
- """
182
- words = text.split()
183
-
184
- # Check for short phrases (3-9 words) repeated multiple times
185
- for phrase_len in range(3, 10):
186
- if len(words) < phrase_len * min_repetitions:
187
- continue
188
-
189
- # Scan through text to find loop start point
190
- for i in range(len(words) - phrase_len * min_repetitions):
191
- phrase = ' '.join(words[i:i+phrase_len])
192
-
193
- # Count consecutive repetitions starting from position i
194
- repetition_count = 0
195
- check_pos = i
196
-
197
- while check_pos + phrase_len <= len(words):
198
- check_phrase = ' '.join(words[check_pos:check_pos+phrase_len])
199
- if check_phrase == phrase:
200
- repetition_count += 1
201
- check_pos += phrase_len
202
- else:
203
- break
204
-
205
- # If we found 5+ consecutive repetitions, truncate at loop start
206
- if repetition_count >= min_repetitions:
207
- return ' '.join(words[:i])
208
-
209
- # If no clear loop found, return original
210
- return text
211
 
212
- # Initialize model
213
- print("🔄 Initializing VibeThinker-1.5B...")
214
  vibe_model = VibeThinkerModel()
215
 
216
- class IntelligentStreamParser:
217
- """Parse streaming output in real-time into sections"""
218
-
219
- def __init__(self):
220
- self.reset()
221
-
222
- def reset(self):
223
- self.thinking = ""
224
- self.explanation = ""
225
- self.code_blocks = []
226
- self.current_code_lang = None
227
- self.current_code_content = ""
228
- self.in_code_block = False
229
- self.phase = "thinking" # thinking -> explanation -> code
230
 
231
- def parse_chunk(self, full_text):
232
- """Parse text in real-time as it streams"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
- # Detect code blocks with regex
235
- code_pattern = r'```(\w+)?\n(.*?)```'
236
- found_codes = re.findall(code_pattern, full_text, re.DOTALL)
 
 
 
 
237
 
238
- # Remove code blocks from text for section detection
239
- text_without_code = re.sub(code_pattern, '###CODE_PLACEHOLDER###', full_text, flags=re.DOTALL)
 
240
 
241
- # Try to split thinking and explanation
242
- thinking_content = ""
243
- explanation_content = text_without_code
 
244
 
245
- # Check for explicit markers
246
- if "Solution:" in text_without_code or "Explanation:" in text_without_code:
247
- parts = re.split(r'(?:Solution|Explanation):', text_without_code, maxsplit=1)
248
- if len(parts) == 2:
249
- thinking_content = parts[0].strip()
250
- explanation_content = parts[1].strip()
251
- elif "```" in text_without_code:
252
- # Split at first code block
253
- parts = text_without_code.split("###CODE_PLACEHOLDER###", maxsplit=1)
254
- if len(parts) == 2 and len(parts[0]) > 100:
255
- # Check if first part looks like thinking
256
- first_part_lower = parts[0].lower()
257
- thinking_keywords = ['approach', 'idea', 'step', 'first', "let's", 'plan', 'strategy']
258
- if any(kw in first_part_lower for kw in thinking_keywords):
259
- thinking_content = parts[0].strip()
260
- explanation_content = parts[1].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
- # Clean up placeholders
263
- explanation_content = explanation_content.replace('###CODE_PLACEHOLDER###', '').strip()
 
 
264
 
265
- return {
266
- 'thinking': thinking_content,
267
- 'explanation': explanation_content,
268
- 'code_blocks': found_codes
269
- }
270
-
271
- parser = IntelligentStreamParser()
272
-
273
- def format_streaming_html(generated_text, stats, loop_detected, is_generating=True):
274
- """Format streaming output with intelligent parsing"""
275
-
276
- # Parse the current text
277
- parsed = parser.parse_chunk(generated_text)
278
-
279
- thinking = parsed['thinking']
280
- explanation = parsed['explanation']
281
- code_blocks = parsed['code_blocks']
282
-
283
- # Stats
284
- if stats:
285
- prompt_tokens = stats.get('prompt_tokens', 0)
286
- tokens_generated = stats.get('tokens_generated', 0) or stats.get('completion_tokens', 0)
287
- generation_time = stats.get('generation_time', 0)
288
- tokens_per_sec = stats.get('tokens_per_sec', 0) or (tokens_generated / generation_time if generation_time > 0 else 0)
289
- is_complete = stats.get('is_complete', False)
290
- else:
291
- prompt_tokens = tokens_generated = generation_time = tokens_per_sec = 0
292
- is_complete = False
293
-
294
- thinking_tokens_est = len(thinking.split()) * 1.3 if thinking else 0
295
- total_tokens = prompt_tokens + tokens_generated
296
-
297
- # Card style
298
- card_base_style = "background: #ffffff; border-radius: 12px; padding: 24px; margin-bottom: 20px; box-shadow: 0 2px 8px rgba(0,0,0,0.08);"
299
-
300
- # Blink cursor CSS
301
- cursor_style = """
302
- <style>
303
- @keyframes blink {
304
- 0%, 49% { opacity: 1; }
305
- 50%, 100% { opacity: 0; }
306
- }
307
- .cursor {
308
- display: inline-block;
309
- width: 2px;
310
- height: 1em;
311
- background: #667eea;
312
- margin-left: 2px;
313
- animation: blink 0.7s infinite;
314
- }
315
- </style>
316
- """
317
-
318
- # Status message
319
- status_emoji = "✅" if is_complete else "🔄"
320
- status_text = "Complete" if is_complete else "Generating..."
321
-
322
- # Stats card
323
- html = f"""
324
- {cursor_style}
325
- <div style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; max-width: 100%; margin: 0 auto; background: #f8f9fa; padding: 20px; border-radius: 12px;">
326
 
327
- <!-- Stats Card -->
328
- <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 24px; border-radius: 12px; margin-bottom: 20px; color: white; box-shadow: 0 4px 12px rgba(102,126,234,0.3);">
329
- <h3 style="margin: 0 0 16px 0; font-size: 17px; font-weight: 600; opacity: 0.95;">{status_emoji} {status_text}</h3>
330
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(130px, 1fr)); gap: 12px; font-size: 13px;">
331
- <div style="background: rgba(255,255,255,0.15); padding: 14px; border-radius: 8px; backdrop-filter: blur(10px);">
332
- <div style="opacity: 0.85; font-size: 11px; margin-bottom: 6px; text-transform: uppercase; letter-spacing: 0.5px;">Time</div>
333
- <div style="font-size: 22px; font-weight: 700;">{generation_time:.1f}s</div>
334
- </div>
335
- <div style="background: rgba(255,255,255,0.15); padding: 14px; border-radius: 8px; backdrop-filter: blur(10px);">
336
- <div style="opacity: 0.85; font-size: 11px; margin-bottom: 6px; text-transform: uppercase; letter-spacing: 0.5px;">Speed</div>
337
- <div style="font-size: 22px; font-weight: 700;">{tokens_per_sec:.1f} t/s</div>
338
  </div>
339
- <div style="background: rgba(255,255,255,0.15); padding: 14px; border-radius: 8px; backdrop-filter: blur(10px);">
340
- <div style="opacity: 0.85; font-size: 11px; margin-bottom: 6px; text-transform: uppercase; letter-spacing: 0.5px;">Tokens</div>
341
- <div style="font-size: 22px; font-weight: 700;">{tokens_generated:,}</div>
342
- </div>
343
- </div>
344
- </div>
345
- """
346
-
347
- # Loop warning (if detected)
348
- if loop_detected:
349
- html += f"""
350
- <div style="{card_base_style} border-left: 4px solid #ffc107; background: #fff3cd;">
351
- <div style="color: #856404; font-weight: 600; display: flex; align-items: center; gap: 8px;">
352
- <span style="font-size: 20px;">⚠️</span>
353
- <span>Loop Detected - Generation stopped to prevent repetition</span>
354
- </div>
355
- </div>
356
- """
357
-
358
- # Thinking section (collapsed if exists)
359
- if thinking and len(thinking.strip()) > 0:
360
- thinking_escaped = thinking.replace('<', '&lt;').replace('>', '&gt;')
361
- cursor_html = '<span class="cursor"></span>' if is_generating and not explanation and not code_blocks else ''
362
- html += f"""
363
- <details style="{card_base_style} border-left: 4px solid #8b5cf6;">
364
- <summary style="cursor: pointer; font-weight: 600; font-size: 16px; color: #7c3aed; user-select: none; display: flex; align-items: center; gap: 10px; padding: 4px 0;">
365
- <span style="font-size: 20px;">🧠</span>
366
- <span>Reasoning Process</span>
367
- <span style="margin-left: auto; font-size: 13px; color: #8b5cf6; font-weight: normal;">~{int(thinking_tokens_est):,} tokens • Click to expand ▼</span>
368
- </summary>
369
- <div style="margin-top: 20px; padding-top: 20px; border-top: 2px solid #e9ecef; color: #495057; line-height: 1.8; white-space: pre-wrap; font-size: 14px; font-family: 'SF Mono', Monaco, 'Courier New', monospace;">
370
- {thinking_escaped}{cursor_html}
371
- </div>
372
- </details>
373
- """
374
-
375
- # Explanation section
376
- if explanation and len(explanation.strip()) > 10:
377
- explanation_escaped = explanation.replace('<', '&lt;').replace('>', '&gt;')
378
- cursor_html = '<span class="cursor"></span>' if is_generating and not code_blocks else ''
379
- html += f"""
380
- <div style="{card_base_style} border-left: 4px solid #10b981;">
381
- <h3 style="margin: 0 0 16px 0; color: #10b981; font-size: 18px; font-weight: 600; display: flex; align-items: center; gap: 10px;">
382
- <span style="font-size: 22px;">💡</span>
383
- <span>Solution Explanation</span>
384
- </h3>
385
- <div style="color: #495057; line-height: 1.8; font-size: 15px; white-space: pre-wrap;">
386
- {explanation_escaped}{cursor_html}
387
  </div>
388
- </div>
389
  """
390
-
391
- # Code blocks
392
- if code_blocks and len(code_blocks) > 0:
393
- code_blocks_html = ""
394
- for idx, (lang, code) in enumerate(code_blocks):
395
- lang_display = lang if lang else "code"
396
- code_id = f"code_{idx}_{int(time.time()*1000)}"
397
- code_clean = code.strip()
398
-
399
- # Add cursor to last code block if generating
400
- cursor_html = '<span class="cursor"></span>' if is_generating and idx == len(code_blocks) - 1 else ''
401
-
402
- code_blocks_html += f"""
403
- <div style="margin-bottom: 16px; background: #1e1e1e; border-radius: 10px; overflow: hidden; box-shadow: 0 2px 8px rgba(0,0,0,0.15);">
404
- <div style="background: #2d2d2d; padding: 12px 20px; color: #e0e0e0; font-weight: 600; font-size: 13px; display: flex; justify-content: space-between; align-items: center;">
405
- <span style="display: flex; align-items: center; gap: 8px;">
406
- <span style="font-size: 16px;">💻</span>
407
- <span>{lang_display}</span>
408
- </span>
409
- <div style="display: flex; gap: 8px;">
410
- <button onclick="navigator.clipboard.writeText(document.getElementById('{code_id}').textContent.replace('▌', '')); this.textContent='✓ Copied'; setTimeout(() => this.textContent='📋 Copy', 2000)"
411
- style="background: #28a745; color: white; border: none; padding: 7px 16px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: all 0.2s;"
412
- onmouseover="if(this.textContent==='📋 Copy') this.style.background='#218838'"
413
- onmouseout="if(this.textContent==='📋 Copy') this.style.background='#28a745'">
414
- 📋 Copy
415
- </button>
416
- <button onclick="downloadCode(document.getElementById('{code_id}').textContent.replace('▌', ''), '{lang_display}')"
417
- style="background: #007bff; color: white; border: none; padding: 7px 16px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: all 0.2s;"
418
- onmouseover="this.style.background='#0056b3'"
419
- onmouseout="this.style.background='#007bff'">
420
- 💾 Download
421
- </button>
422
- </div>
423
  </div>
424
- <pre style="margin: 0; padding: 20px; color: #d4d4d4; overflow-x: auto; font-family: 'SF Mono', Monaco, 'Courier New', monospace; font-size: 14px; line-height: 1.6; background: #1e1e1e;"><code id="{code_id}">{code_clean}{cursor_html}</code></pre>
425
- </div>
426
  """
427
-
428
- html += f"""
429
- <div style="{card_base_style} border-left: 4px solid #6b7280;">
430
- <h3 style="margin: 0 0 20px 0; color: #6b7280; font-size: 18px; font-weight: 600; display: flex; align-items: center; gap: 10px;">
431
- <span style="font-size: 22px;">💻</span>
432
- <span>Implementation</span>
433
- </h3>
434
- {code_blocks_html}
435
- </div>
436
-
437
- <script>
438
- function downloadCode(code, lang) {{
439
- const extensions = {{
440
- 'python': 'py', 'javascript': 'js', 'java': 'java',
441
- 'cpp': 'cpp', 'c': 'c', 'html': 'html', 'css': 'css',
442
- 'typescript': 'ts', 'rust': 'rs', 'go': 'go',
443
- }};
444
- const ext = extensions[lang.toLowerCase()] || 'txt';
445
- const filename = `solution.${{ext}}`;
446
 
447
- const blob = new Blob([code], {{ type: 'text/plain' }});
448
- const url = window.URL.createObjectURL(blob);
449
- const a = document.createElement('a');
450
- a.href = url;
451
- a.download = filename;
452
- document.body.appendChild(a);
453
- a.click();
454
- document.body.removeChild(a);
455
- window.URL.revokeObjectURL(url);
456
- }}
457
- </script>
458
  """
459
-
460
- html += "</div>"
461
- return html
462
 
463
- def generate_solution_streaming(prompt, temperature=0.6, max_tokens=8192, max_thinking_tokens=4096):
464
- """Generate solution with streaming"""
465
- if not prompt.strip():
466
- yield "<p style='color: #dc3545; font-size: 16px; padding: 20px;'>⚠️ Please enter a problem to solve.</p>"
467
- return
468
-
469
- # Reset parser
470
- parser.reset()
471
 
472
- # Stream generation
473
- for generated_text, stats, loop_detected in vibe_model.generate_response_streaming(
474
- prompt,
475
- temperature=temperature,
476
- max_new_tokens=max_tokens,
477
- max_thinking_tokens=max_thinking_tokens
478
- ):
479
  if stats:
480
- is_generating = not stats.get('is_complete', False)
481
- html_output = format_streaming_html(generated_text, stats, loop_detected, is_generating)
482
- yield html_output
483
  else:
484
- yield f"<p style='color: #dc3545;'>Error: {generated_text}</p>"
 
 
 
485
 
486
- # Create Gradio interface
487
  with gr.Blocks(
488
- theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"),
489
- css=".gradio-container { max-width: 1400px !important; }"
 
 
 
 
 
490
  ) as demo:
491
- gr.Markdown("""
492
- # 🧠 VibeThinker-1.5B Competitive Coding Assistant
493
-
494
- **Optimized for**: Competitive programming (LeetCode, Codeforces, AtCoder) and algorithm challenges
495
-
496
- 🎯 **Best for**: Python algorithmic problems with clear input/output specifications
497
-
498
- ⚠️ **Note**: This model is specialized for competitive programming, not general software development
499
 
500
- ✨ **Features**: Real-time streaming with intelligent section parsing and automatic loop detection
 
 
 
 
501
  """)
502
 
503
  with gr.Row():
504
- with gr.Column(scale=1):
505
- prompt_input = gr.Textbox(
506
- label="💭 Your Coding Problem",
507
- placeholder="Example: Write a Python function to find the longest palindromic substring in a given string. Include test cases.",
508
- lines=8
 
 
509
  )
510
 
511
- with gr.Accordion("⚙️ Advanced Settings", open=False):
512
- temperature_slider = gr.Slider(
513
- minimum=0.0, maximum=1.0, value=0.6, step=0.1,
514
- label="🌡️ Temperature (0.6 recommended)"
515
- )
516
- max_tokens_slider = gr.Slider(
517
- minimum=1024, maximum=16384, value=8192, step=1024,
518
- label="📝 Max New Tokens"
519
- )
520
- max_thinking_slider = gr.Slider(
521
- minimum=512, maximum=8192, value=3072, step=512,
522
- label="🧠 Max Thinking Tokens (hint for prompt)"
523
- )
524
-
525
- gr.Markdown("""
526
- **Tips:**
527
- - Lower thinking tokens (1024-2048) for faster, direct solutions
528
- - Higher thinking tokens (4096-8192) for complex reasoning
529
- - Temperature 0.6 balances creativity and accuracy
530
-
531
- **Real-time Features:**
532
- - 🔄 Live token-by-token streaming
533
- - 🧠 Intelligent section parsing (thinking/explanation/code)
534
- - ⚠️ Automatic loop detection (stops if repetitive patterns detected)
535
- - ⚡ Blinking cursors on actively streaming sections
536
- - 📊 Live statistics (time, speed, tokens)
537
-
538
- **Loop Detection:**
539
- - Monitors for 3-9 word phrases repeated 5+ times
540
- - Automatically stops generation to save tokens
541
- - Truncates at loop start position
542
- """)
543
 
544
- generate_btn = gr.Button("🚀 Generate Solution", variant="primary", size="lg")
545
- clear_btn = gr.Button("🗑️ Clear", size="sm")
546
-
 
 
547
  with gr.Column(scale=2):
548
- output_html = gr.HTML(label="Solution")
549
-
550
- generate_btn.click(
551
- fn=generate_solution_streaming,
552
- inputs=[prompt_input, temperature_slider, max_tokens_slider, max_thinking_slider],
553
- outputs=output_html
554
- )
555
-
556
- clear_btn.click(
557
- fn=lambda: ("", ""),
558
- outputs=[prompt_input, output_html]
559
- )
560
 
 
 
 
561
  gr.Examples(
562
  examples=[
563
- ["Write a Python function to find the maximum sum of a contiguous subarray (Kadane's Algorithm). Include edge cases and test with array [-2,1,-3,4,-1,2,1,-5,4]"],
564
- ["Implement a function to detect if a linked list has a cycle. Explain your approach and provide the solution."],
565
- ["Given an array of integers and a target sum, find two numbers that add up to the target. Optimize for time complexity."],
566
- ["Create a single page HTML application that lets the user choose a color and generates a matching color palette."],
567
  ],
568
- inputs=prompt_input
569
  )
570
 
571
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
+ from threading import Thread, Event
5
  import re
6
  import time
7
+ import html
8
+
9
+ # --- Configuration ---
10
+ MODEL_ID = "WeiboAI/VibeThinker-1.5B"
11
 
12
  class VibeThinkerModel:
13
  def __init__(self):
14
  self.model = None
15
  self.tokenizer = None
16
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
17
+ self.stop_signal = Event()
18
+
19
  def load_model(self):
20
+ if self.model is not None: return
21
+ print(f"🔄 Loading {MODEL_ID}...")
22
  try:
23
+ self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 
 
 
 
 
24
  self.model = AutoModelForCausalLM.from_pretrained(
25
+ MODEL_ID,
26
+ torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
27
  device_map="auto",
28
+ trust_remote_code=True,
29
+ low_cpu_mem_usage=True
30
  )
31
+ print("✅ Model loaded.")
32
+ except Exception as e:
33
+ raise e
34
+
35
+ def stop_generation(self):
36
+ self.stop_signal.set()
37
+
38
+ def _detect_tail_loop(self, text, min_phrase_len=3, max_phrase_len=10, threshold=20):
39
+ """
40
+ Detects if the generator has gotten stuck in a loop at the END of the text.
41
+ Criteria: A phrase of 3-10 words repeated at least 20 times consecutively.
42
+ """
43
+ words = text.split()
44
+ total_words = len(words)
45
+
46
+ # We need at least (min_phrase * threshold) words to even check
47
+ if total_words < min_phrase_len * threshold:
48
+ return False
49
+
50
+ # Only check the end of the string (optimization)
51
+ # We look at the last (max_phrase * threshold) words
52
+ check_window = max_phrase_len * threshold
53
+ recent_words = words[-check_window:] if total_words > check_window else words
54
+
55
+ for phrase_len in range(min_phrase_len, max_phrase_len + 1):
56
+ # The candidate phrase is the very last 'phrase_len' words
57
+ candidate_phrase = recent_words[-phrase_len:]
58
 
59
+ # Construct what the tail SHOULD look like if it's looping
60
+ # e.g. if phrase is "and then", we expect "and then and then..."
61
+ # We check if the tail of the text matches (phrase * threshold)
62
 
63
+ required_len = phrase_len * threshold
64
+ if len(recent_words) < required_len:
65
+ continue
66
+
67
+ segment_to_check = recent_words[-required_len:]
68
+
69
+ # Efficient check: does the segment consist ONLY of the candidate phrase?
70
+ # We compare the segment against the candidate phrase repeated
71
+ expected_segment = candidate_phrase * threshold
72
+
73
+ if segment_to_check == expected_segment:
74
+ return True
75
+
76
+ return False
77
 
78
+ def generate_response_streaming(self, prompt, temperature=0.6, max_new_tokens=32000):
79
+ if not self.model: self.load_model()
80
+ self.stop_signal.clear()
 
 
81
 
82
  try:
83
  start_time = time.time()
84
 
85
+ # Optimized Prompt for VibeThinker
86
+ messages = [
87
+ {"role": "system", "content": "You are an expert algorithm engineer. Analyze the problem deeply, then provide a clean Python solution."},
88
+ {"role": "user", "content": prompt}
89
+ ]
90
+ text_input = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
91
+ inputs = self.tokenizer(text_input, return_tensors="pt").to(self.device)
 
 
 
 
 
 
 
 
 
 
92
 
93
+ streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
 
 
 
 
 
94
 
 
95
  generation_kwargs = dict(
96
  **inputs,
97
  max_new_tokens=max_new_tokens,
 
99
  top_p=0.95,
100
  top_k=50,
101
  do_sample=True,
 
102
  pad_token_id=self.tokenizer.eos_token_id,
103
  streamer=streamer,
104
  )
105
 
 
106
  thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
107
  thread.start()
108
 
 
109
  generated_text = ""
 
110
  loop_detected = False
111
 
112
+ # Token counter for loop check frequency
113
+ token_count = 0
114
+
115
  for new_text in streamer:
116
+ if self.stop_signal.is_set(): break
 
117
 
118
  generated_text += new_text
119
+ token_count += 1
 
120
 
121
+ # Check for loops every 10 tokens to save CPU
122
+ if token_count % 10 == 0:
123
+ if self._detect_tail_loop(generated_text):
124
  loop_detected = True
125
+ self.stop_signal.set() # Stop the model
126
+ # Optional: Truncate the repetitive garbage
127
+ # (Simple truncation for UI cleanliness)
128
+ generated_text = generated_text + "\n\n[⚠️ Generation stopped: Infinite loop detected]"
129
+ break
130
+
131
  yield generated_text, {
132
+ "time": time.time() - start_time,
133
+ "tokens": len(self.tokenizer.encode(generated_text)),
134
+ "generating": True
135
+ }
 
136
 
137
+ if not self.stop_signal.is_set():
138
+ thread.join()
 
 
 
 
139
 
140
  yield generated_text, {
141
+ "time": time.time() - start_time,
142
+ "tokens": len(self.tokenizer.encode(generated_text)),
143
+ "generating": False
144
+ }
 
 
145
 
146
  except Exception as e:
147
+ yield f"Error: {str(e)}", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
 
 
149
  vibe_model = VibeThinkerModel()
150
 
151
+ class ModernUIParser:
152
+ """Parses text into a structured, modern UI"""
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
+ def format_code(self, code, lang="python"):
155
+ """Applies basic HTML syntax highlighting regex"""
156
+ code = html.escape(code)
157
+ # Comments
158
+ code = re.sub(r'(#.*?)(?=\n|$)', r'<span class="c">\1</span>', code)
159
+ # Keywords
160
+ keywords = r'\b(def|class|return|if|else|elif|for|while|import|from|try|except|with|as|pass|None|True|False)\b'
161
+ code = re.sub(keywords, r'<span class="k">\1</span>', code)
162
+ # Builtins/Calls
163
+ code = re.sub(r'\b(print|len|range|enumerate|zip|super|__init__)\b', r'<span class="nf">\1</span>', code)
164
+ # Strings
165
+ code = re.sub(r'(&quot;.*?&quot;)', r'<span class="s">\1</span>', code)
166
+ code = re.sub(r"('.*?')", r'<span class="s">\1</span>', code)
167
+ return code
168
+
169
+ def parse_and_render(self, text, stats):
170
+ # 1. Separate Thinking from Content
171
+ # Heuristic: Content before the first code block or explicit "Solution" header is usually thinking
172
+ thinking = ""
173
+ solution = text
174
 
175
+ # Find split point
176
+ markers = ["```", "Here is the solution", "### Solution", "Implementation:"]
177
+ first_marker_idx = len(text)
178
+ for m in markers:
179
+ idx = text.find(m)
180
+ if idx != -1 and idx < first_marker_idx:
181
+ first_marker_idx = idx
182
 
183
+ if first_marker_idx < len(text) and first_marker_idx > 50:
184
+ thinking = text[:first_marker_idx].strip()
185
+ solution = text[first_marker_idx:].strip()
186
 
187
+ # 2. Process Solution Text (Markdown-ish to HTML)
188
+ # Handle Code Blocks
189
+ parts = re.split(r'(```\w*\n.*?```)', solution, flags=re.DOTALL)
190
+ solution_html = ""
191
 
192
+ for part in parts:
193
+ if part.startswith('```'):
194
+ # Extract lang and code
195
+ match = re.match(r'```(\w*)\n(.*?)```', part, re.DOTALL)
196
+ if match:
197
+ lang = match.group(1) or "text"
198
+ code_content = match.group(2)
199
+ highlighted = self.format_code(code_content, lang)
200
+ solution_html += f"""
201
+ <div class="code-block">
202
+ <div class="code-header">
203
+ <span class="lang-tag">{lang}</span>
204
+ <span class="copy-btn" onclick="navigator.clipboard.writeText(this.parentElement.nextElementSibling.innerText)">Copy</span>
205
+ </div>
206
+ <pre>{highlighted}</pre>
207
+ </div>"""
208
+ else:
209
+ solution_html += f"<pre>{html.escape(part)}</pre>"
210
+ else:
211
+ # Normal text processing
212
+ clean_text = html.escape(part)
213
+ # Headers
214
+ clean_text = re.sub(r'^### (.*?)$', r'<h3>\1</h3>', clean_text, flags=re.M)
215
+ clean_text = re.sub(r'^## (.*?)$', r'<h2>\1</h2>', clean_text, flags=re.M)
216
+ clean_text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', clean_text)
217
+ # Line breaks
218
+ clean_text = clean_text.replace('\n', '<br>')
219
+ solution_html += f"<div class='text-content'>{clean_text}</div>"
220
+
221
+ # 3. Process Thinking
222
+ thinking_html = html.escape(thinking).replace('\n', '<br>')
223
 
224
+ # 4. Stats & Cursor
225
+ is_gen = stats['generating'] if stats else False
226
+ t_sec = stats['tokens'] / stats['time'] if stats and stats['time'] > 0 else 0
227
+ cursor = '<span class="cursor"></span>' if is_gen else ''
228
 
229
+ # CSS Styles (Modern Dark Theme)
230
+ css = """
231
+ <style>
232
+ :root { --bg: #0f1117; --card: #1e293b; --accent: #6366f1; --text: #e2e8f0; --dim: #94a3b8; }
233
+ .ui-container { font-family: 'Inter', system-ui, sans-serif; color: var(--text); line-height: 1.6; }
234
+
235
+ /* Stats Bar */
236
+ .stats-bar { display: flex; gap: 15px; margin-bottom: 20px; font-size: 12px; text-transform: uppercase; letter-spacing: 1px; }
237
+ .stat-pill { background: #334155; padding: 4px 10px; border-radius: 20px; color: #cbd5e1; display: flex; align-items: center; gap: 6px; }
238
+ .stat-active { border: 1px solid var(--accent); color: var(--accent); background: rgba(99, 102, 241, 0.1); }
239
+
240
+ /* Thinking Section */
241
+ details.thinking-box { margin-bottom: 20px; border: 1px solid #312e81; border-radius: 8px; background: rgba(49, 46, 129, 0.1); overflow: hidden; }
242
+ details.thinking-box summary { padding: 12px 16px; cursor: pointer; font-weight: 600; color: #818cf8; list-style: none; outline: none; user-select: none; }
243
+ details.thinking-box summary::marker { display: none; }
244
+ details.thinking-box summary:hover { background: rgba(49, 46, 129, 0.2); }
245
+ .thought-content { padding: 16px; font-family: 'JetBrains Mono', monospace; font-size: 13px; color: #a5b4fc; border-top: 1px solid #312e81; }
246
+
247
+ /* Solution Section */
248
+ .solution-box { background: var(--bg); padding: 10px 0; }
249
+ .text-content { margin-bottom: 10px; }
250
+ h2, h3 { color: white; margin-top: 20px; margin-bottom: 10px; font-weight: 600; }
251
+ strong { color: #fff; font-weight: 700; }
252
+
253
+ /* Code Blocks */
254
+ .code-block { background: #0d1117; border: 1px solid #30363d; border-radius: 8px; margin: 15px 0; overflow: hidden; }
255
+ .code-header { background: #161b22; padding: 6px 12px; display: flex; justify-content: space-between; align-items: center; border-bottom: 1px solid #30363d; }
256
+ .lang-tag { font-size: 11px; color: #8b949e; text-transform: uppercase; font-weight: bold; }
257
+ .copy-btn { font-size: 11px; cursor: pointer; color: #58a6ff; }
258
+ .copy-btn:hover { text-decoration: underline; }
259
+ pre { margin: 0; padding: 16px; overflow-x: auto; font-family: 'Fira Code', 'Consolas', monospace; font-size: 14px; color: #c9d1d9; }
260
+
261
+ /* Syntax Highlighting Colors */
262
+ .k { color: #ff7b72; } /* Keyword */
263
+ .s { color: #a5d6ff; } /* String */
264
+ .c { color: #8b949e; font-style: italic; } /* Comment */
265
+ .nf { color: #d2a8ff; } /* Function */
266
+
267
+ /* Cursor Animation */
268
+ .cursor { display: inline-block; width: 8px; height: 18px; background: var(--accent); vertical-align: text-bottom; animation: blink 1s step-end infinite; margin-left: 2px; }
269
+ @keyframes blink { 0%, 100% { opacity: 1; } 50% { opacity: 0; } }
270
+ </style>
271
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
+ html_out = f"""{css}
274
+ <div class="ui-container">
275
+ <div class="stats-bar">
276
+ <div class="stat-pill {'stat-active' if is_gen else ''}">
277
+ { '🟢 GENERATING' if is_gen else '⚪ COMPLETE' }
 
 
 
 
 
 
278
  </div>
279
+ <div class="stat-pill">⏱️ {stats['time']:.1f}s</div>
280
+ <div class="stat-pill">⚡ {t_sec:.1f} T/s</div>
281
+ <div class="stat-pill">📝 {stats['tokens']} Tok</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  </div>
 
283
  """
284
+
285
+ if thinking:
286
+ # Open by default if generating, closed if done
287
+ is_open = "open" if is_gen else ""
288
+ html_out += f"""
289
+ <details class="thinking-box" {is_open}>
290
+ <summary>🧠 Chain of Thought (Process)</summary>
291
+ <div class="thought-content">
292
+ {thinking_html} {cursor if not solution else ''}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  </div>
294
+ </details>
 
295
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
+ html_out += f"""
298
+ <div class="solution-box">
299
+ {solution_html} {cursor if solution or not thinking else ''}
300
+ </div>
301
+ </div>
 
 
 
 
 
 
302
  """
303
+
304
+ return html_out
 
305
 
306
+ parser = ModernUIParser()
307
+
308
+ def run_gen(prompt, temp, max_tokens):
309
+ if not prompt: return "Please enter a prompt."
 
 
 
 
310
 
311
+ gen = vibe_model.generate_response_streaming(prompt, temp, max_tokens)
312
+ for text, stats in gen:
 
 
 
 
 
313
  if stats:
314
+ yield parser.parse_and_render(text, stats)
 
 
315
  else:
316
+ yield f"<div style='color:red'>Error: {text}</div>"
317
+
318
+ def stop_action():
319
+ vibe_model.stop_generation()
320
 
321
+ # --- GRADIO INTERFACE ---
322
  with gr.Blocks(
323
+ title="VibeThinker IDE",
324
+ theme=gr.themes.Base(
325
+ primary_hue="indigo",
326
+ neutral_hue="slate",
327
+ font=("Inter", "sans-serif")
328
+ ),
329
+ css=".gradio-container { background-color: #0f1117 !important; border: none; }"
330
  ) as demo:
 
 
 
 
 
 
 
 
331
 
332
+ gr.Markdown("""
333
+ <div style="text-align: center; margin-bottom: 20px;">
334
+ <h1 style="color: white; font-size: 2rem;">⚡ VibeThinker IDE</h1>
335
+ <p style="color: #94a3b8;">Specialized 1.5B Model for Algorithms & Competitive Coding</p>
336
+ </div>
337
  """)
338
 
339
  with gr.Row():
340
+ # Left Column: Inputs
341
+ with gr.Column(scale=1, variant="panel"):
342
+ input_text = gr.Textbox(
343
+ label="Problem Statement",
344
+ lines=8,
345
+ placeholder="Paste a LeetCode problem or ask for a specific algorithm...",
346
+ elem_id="input-box"
347
  )
348
 
349
+ with gr.Accordion("Settings", open=False):
350
+ temp = gr.Slider(0.1, 1.0, value=0.6, label="Temperature")
351
+ tokens = gr.Slider(1024, 32000, value=8192, label="Max Tokens")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
 
353
+ with gr.Row():
354
+ btn_run = gr.Button(" Run", variant="primary", scale=2)
355
+ btn_stop = gr.Button("⏹ Stop", variant="stop", scale=1)
356
+
357
+ # Right Column: Output
358
  with gr.Column(scale=2):
359
+ out_html = gr.HTML(label="Result Console")
 
 
 
 
 
 
 
 
 
 
 
360
 
361
+ btn_run.click(run_gen, inputs=[input_text, temp, tokens], outputs=out_html)
362
+ btn_stop.click(stop_action, None, None)
363
+
364
  gr.Examples(
365
  examples=[
366
+ ["Determine if a Sudoku board is valid. Provide a Python solution with O(1) space complexity if possible."],
367
+ ["Explain the Knuth-Morris-Pratt (KMP) algorithm and implement it in Python."],
368
+ ["Solve the 'Trapping Rain Water' problem using the two-pointer approach."],
 
369
  ],
370
+ inputs=input_text
371
  )
372
 
373
  if __name__ == "__main__":