Spaces:

WellGoods
/

VibeThinker

Sleeping

App Files Files Community

VladBoyko commited on 23 days ago

Commit

a271ec8

verified ·

1 Parent(s): 6368a59

Update app.py

Browse files

Files changed (1) hide show

app.py +100 -47

app.py CHANGED Viewed

@@ -20,19 +20,19 @@ class VibeThinkerVLLM:
                 model="WeiboAI/VibeThinker-1.5B",
                 dtype="float16",  # Use float16 instead of bfloat16 for T4
                 gpu_memory_utilization=0.85,
-                max_model_len=16384,  # Reduced from 40960 for T4 stability
                 enforce_eager=True,  # Disable CUDA graphs for T4
                 disable_custom_all_reduce=True,  # Avoid custom kernels
                 enable_prefix_caching=False,  # Disable for stability
                 max_num_seqs=1,  # Process one sequence at a time
                 trust_remote_code=True
             )
-            print("✅ vLLM model loaded successfully with T4-compatible settings")
         except Exception as e:
             print(f"❌ Error loading model: {e}")
             raise
-    def generate_response(self, prompt, temperature=0.6, max_tokens=8192, max_thinking_tokens=4096):
         """
         Generate response with thinking length control and loop detection
@@ -106,7 +106,6 @@ Keep reasoning under {max_thinking_tokens} tokens. DO NOT repeat yourself.<|im_e
     def _detect_loop(self, text):
         """Detect if text contains repetitive loops"""
-        # Check for repeated phrases
         words = text.split()
         if len(words) < 20:
             return False
@@ -129,71 +128,84 @@ Keep reasoning under {max_thinking_tokens} tokens. DO NOT repeat yourself.<|im_e
                 rest_start = i + length
                 rest = ' '.join(words[rest_start:])
                 if phrase in rest:
-                    # Truncate at first repetition
                     return ' '.join(words[:rest_start])
         return text
 def parse_model_output(text):
     """
     Parse model output to separate thinking and final answer
     Returns: (thinking_content, answer_content, code_blocks)
     """
-    # Remove common loop warning
     loop_warning = ""
     if "[Loop detected and truncated]" in text:
         loop_warning = "\n\n⚠️ **Note**: Repetitive content was detected and removed"
         text = text.replace("⚠️ *[Loop detected and truncated]*", "")
-    # Try to find thinking section (common patterns)
     thinking_patterns = [
         r'<think>(.*?)</think>',
         r'<thinking>(.*?)</thinking>',
-        r'(?:Let me think|Let\'s think|Analysis):(.*?)(?=\n\n[SA-Z]|Solution:|Code:|```|\Z)',
     ]
     thinking_content = ""
     for pattern in thinking_patterns:
         match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
         if match:
             thinking_content = match.group(1).strip()
-            text = re.sub(pattern, '', text, flags=re.DOTALL | re.IGNORECASE)
             break
-    # If no explicit thinking, extract first paragraph if it's analytical
     if not thinking_content:
-        paragraphs = text.split('\n\n')
-        if paragraphs and len(paragraphs[0]) > 50 and len(paragraphs[0]) < 500:
-            first_para = paragraphs[0].lower()
-            if any(word in first_para for word in ['approach', 'step', 'first', 'algorithm', 'solution']):
-                thinking_content = paragraphs[0]
-                text = '\n\n'.join(paragraphs[1:])
-    # Extract code blocks
     code_pattern = r'```(\w+)?\n(.*?)```'
-    code_blocks = re.findall(code_pattern, text, re.DOTALL)
-    # Extract final answer (boxed or explicit)
-    answer_match = re.search(r'\\boxed\{([^}]+)\}', text)
     if answer_match:
-        answer_content = f"**Final Answer:** {answer_match.group(1)}"
     else:
-        answer_content = text.strip()
-    answer_content += loop_warning
-    return thinking_content, answer_content, code_blocks
 def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_tokens, generation_time):
     """
-    Format output as styled HTML with good contrast and modern design
     """
     total_tokens = prompt_tokens + completion_tokens
     thinking_tokens_est = len(thinking.split()) * 1.3 if thinking else 0
     tokens_per_sec = completion_tokens / generation_time if generation_time > 0 else 0
-    # Build thinking section HTML if exists
     thinking_html = ""
     if thinking:
         thinking_html = f"""
         <details style="background: #f8f9fa; border: 2px solid #e9ecef; border-radius: 12px; padding: 20px; margin-bottom: 24px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
             <summary style="cursor: pointer; font-weight: 600; font-size: 16px; color: #495057; user-select: none; display: flex; align-items: center; gap: 8px;">
@@ -202,29 +214,42 @@ def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_
                 <span style="margin-left: auto; font-size: 12px; color: #6c757d;">Click to expand/collapse</span>
             </summary>
             <div style="margin-top: 16px; padding-top: 16px; border-top: 1px solid #dee2e6; color: #212529; line-height: 1.7; white-space: pre-wrap; font-size: 14px; font-family: 'SF Mono', Monaco, Consolas, monospace; background: #ffffff; padding: 16px; border-radius: 8px;">
-{thinking}
             </div>
         </details>
         """
-    # Build code blocks HTML if exist
     code_html = ""
     if code_blocks:
         code_blocks_html = ""
-        for lang, code in code_blocks:
             lang_display = lang if lang else "code"
             code_blocks_html += f"""
             <div style="margin-bottom: 16px; background: #1e1e1e; border-radius: 12px; overflow: hidden; box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
                 <div style="background: #2d2d2d; padding: 12px 20px; color: #ffffff; font-weight: 600; font-size: 13px; display: flex; justify-content: space-between; align-items: center; border-bottom: 1px solid #3d3d3d;">
                     <span>{lang_display}</span>
-                    <button onclick="navigator.clipboard.writeText(this.parentElement.nextElementSibling.textContent)"
-                            style="background: #4CAF50; color: white; border: none; padding: 6px 14px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: background 0.2s;"
-                            onmouseover="this.style.background='#45a049'"
-                            onmouseout="this.style.background='#4CAF50'">
-                        📋 Copy
-                    </button>
                 </div>
-                <pre style="margin: 0; padding: 20px; color: #d4d4d4; overflow-x: auto; font-family: 'SF Mono', Monaco, Consolas, monospace; font-size: 14px; line-height: 1.6;"><code>{code.strip()}</code></pre>
             </div>
             """
@@ -235,6 +260,35 @@ def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_
             </h3>
             {code_blocks_html}
         </div>
         """
     html = f"""
@@ -271,7 +325,7 @@ def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_
             </div>
         </div>
-        <!-- Thinking Section -->
         {thinking_html}
         <!-- Answer Section -->
@@ -284,7 +338,7 @@ def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_
             </div>
         </div>
-        <!-- Code Blocks -->
         {code_html}
     </div>
@@ -292,18 +346,16 @@ def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_
     return html
 # Initialize model
-print("🔄 Initializing VibeThinker with vLLM (T4-optimized)...")
 vibe_model = VibeThinkerVLLM()
-def generate_solution(prompt, temperature=0.6, max_tokens=8192, max_thinking_tokens=4096, progress=gr.Progress()):
     """Generate and format solution with progress tracking"""
     if not prompt.strip():
         return "<p style='color: #dc3545; font-size: 16px; padding: 20px;'>⚠️ Please enter a problem to solve.</p>"
-    # Show progress
     progress(0, desc="🔄 Initializing generation...")
-    # Generate response
     progress(0.3, desc="🧠 Model is thinking...")
     response, prompt_tokens, completion_tokens, gen_time = vibe_model.generate_response(
         prompt,
@@ -314,7 +366,7 @@ def generate_solution(prompt, temperature=0.6, max_tokens=8192, max_thinking_tok
     progress(0.8, desc="📝 Formatting output...")
-    # Parse output
     thinking, answer, code_blocks = parse_model_output(response)
     # Format as HTML
@@ -341,7 +393,7 @@ with gr.Blocks(
     **Optimized for**: Competitive programming (LeetCode, Codeforces, AtCoder) and algorithm challenges
-    ⚡ **Powered by vLLM** | 🎯 **Best for**: Python algorithmic problems with clear input/output specs
     ⚠️ **Note**: This model is specialized for competitive programming, not general software development
     """)
@@ -364,10 +416,10 @@ with gr.Blocks(
                 )
                 max_tokens_slider = gr.Slider(
                     minimum=1024,
-                    maximum=16384,
-                    value=8192,
                     step=1024,
-                    label="📝 Max Total Tokens"
                 )
                 max_thinking_slider = gr.Slider(
                     minimum=512,
@@ -383,6 +435,7 @@ with gr.Blocks(
                 - Higher thinking tokens (4096-8192) for complex problems requiring detailed reasoning
                 - Temperature 0.6 balances creativity and accuracy
                 - Loop detection is automatic - repetitive output will be truncated
                 """)
             generate_btn = gr.Button("🚀 Generate Solution", variant="primary", size="lg")

                 model="WeiboAI/VibeThinker-1.5B",
                 dtype="float16",  # Use float16 instead of bfloat16 for T4
                 gpu_memory_utilization=0.85,
+                max_model_len=40960,  # Full 40K context as per docs
                 enforce_eager=True,  # Disable CUDA graphs for T4
                 disable_custom_all_reduce=True,  # Avoid custom kernels
                 enable_prefix_caching=False,  # Disable for stability
                 max_num_seqs=1,  # Process one sequence at a time
                 trust_remote_code=True
             )
+            print("✅ vLLM model loaded successfully with T4-compatible settings (40K context)")
         except Exception as e:
             print(f"❌ Error loading model: {e}")
             raise
+    def generate_response(self, prompt, temperature=0.6, max_tokens=16384, max_thinking_tokens=4096):
         """
         Generate response with thinking length control and loop detection
     def _detect_loop(self, text):
         """Detect if text contains repetitive loops"""
         words = text.split()
         if len(words) < 20:
             return False
                 rest_start = i + length
                 rest = ' '.join(words[rest_start:])
                 if phrase in rest:
                     return ' '.join(words[:rest_start])
         return text
 def parse_model_output(text):
     """
     Parse model output to separate thinking and final answer
+    ONLY extract code from the final answer section, not from thinking
     Returns: (thinking_content, answer_content, code_blocks)
     """
     loop_warning = ""
     if "[Loop detected and truncated]" in text:
         loop_warning = "\n\n⚠️ **Note**: Repetitive content was detected and removed"
         text = text.replace("⚠️ *[Loop detected and truncated]*", "")
+    # Try to find explicit thinking delimiters
     thinking_patterns = [
         r'<think>(.*?)</think>',
         r'<thinking>(.*?)</thinking>',
     ]
     thinking_content = ""
+    answer_content = text
     for pattern in thinking_patterns:
         match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
         if match:
             thinking_content = match.group(1).strip()
+            answer_content = re.sub(pattern, '', text, flags=re.DOTALL | re.IGNORECASE).strip()
             break
+    # If no explicit thinking tags, try to detect reasoning section
+    # Look for a natural break like "Solution:" or "Here's the code:"
     if not thinking_content:
+        split_markers = [
+            r'(.*?)(?=\n\n(?:Solution|Here\'s|Implementation|Code|Final).*?:)',
+            r'(.*?)(?=\n\n```)',  # Before first code block
+        ]
+        for pattern in split_markers:
+            match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
+            if match:
+                potential_thinking = match.group(1).strip()
+                # Only treat as thinking if it's substantial (>100 chars) and contains reasoning keywords
+                if len(potential_thinking) > 100:
+                    thinking_lower = potential_thinking.lower()
+                    if any(word in thinking_lower for word in ['step', 'approach', 'idea', 'first', 'we can', 'let\'s']):
+                        thinking_content = potential_thinking
+                        answer_content = text[len(potential_thinking):].strip()
+                        break
+    # NOW extract code blocks ONLY from answer_content (not from thinking)
     code_pattern = r'```(\w+)?\n(.*?)```'
+    code_blocks = re.findall(code_pattern, answer_content, re.DOTALL)
+    # Extract final answer (boxed or use answer_content as-is)
+    answer_match = re.search(r'\\boxed\{([^}]+)\}', answer_content)
     if answer_match:
+        final_answer = f"**Final Answer:** {answer_match.group(1)}"
     else:
+        final_answer = answer_content
+    final_answer += loop_warning
+    return thinking_content, final_answer, code_blocks
 def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_tokens, generation_time):
     """
+    Format output as styled HTML - thinking is plain text, code blocks are from final answer only
     """
     total_tokens = prompt_tokens + completion_tokens
     thinking_tokens_est = len(thinking.split()) * 1.3 if thinking else 0
     tokens_per_sec = completion_tokens / generation_time if generation_time > 0 else 0
+    # Build thinking section HTML - PLAIN TEXT, NO CODE PARSING
     thinking_html = ""
     if thinking:
+        # Escape any HTML in thinking to prevent rendering
+        thinking_escaped = thinking.replace('<', '&lt;').replace('>', '&gt;')
         thinking_html = f"""
         <details style="background: #f8f9fa; border: 2px solid #e9ecef; border-radius: 12px; padding: 20px; margin-bottom: 24px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
             <summary style="cursor: pointer; font-weight: 600; font-size: 16px; color: #495057; user-select: none; display: flex; align-items: center; gap: 8px;">
                 <span style="margin-left: auto; font-size: 12px; color: #6c757d;">Click to expand/collapse</span>
             </summary>
             <div style="margin-top: 16px; padding-top: 16px; border-top: 1px solid #dee2e6; color: #212529; line-height: 1.7; white-space: pre-wrap; font-size: 14px; font-family: 'SF Mono', Monaco, Consolas, monospace; background: #ffffff; padding: 16px; border-radius: 8px;">
+{thinking_escaped}
             </div>
         </details>
         """
+    # Build code blocks HTML - ONLY from final answer
     code_html = ""
     if code_blocks:
         code_blocks_html = ""
+        for idx, (lang, code) in enumerate(code_blocks):
             lang_display = lang if lang else "code"
+            code_id = f"code_{idx}"
+            # Create downloadable version
+            code_clean = code.strip()
             code_blocks_html += f"""
             <div style="margin-bottom: 16px; background: #1e1e1e; border-radius: 12px; overflow: hidden; box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
                 <div style="background: #2d2d2d; padding: 12px 20px; color: #ffffff; font-weight: 600; font-size: 13px; display: flex; justify-content: space-between; align-items: center; border-bottom: 1px solid #3d3d3d;">
                     <span>{lang_display}</span>
+                    <div style="display: flex; gap: 8px;">
+                        <button onclick="navigator.clipboard.writeText(document.getElementById('{code_id}').textContent)"
+                                style="background: #4CAF50; color: white; border: none; padding: 6px 14px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: background 0.2s;"
+                                onmouseover="this.style.background='#45a049'"
+                                onmouseout="this.style.background='#4CAF50'">
+                            📋 Copy
+                        </button>
+                        <button onclick="downloadCode(document.getElementById('{code_id}').textContent, '{lang_display}')"
+                                style="background: #2196F3; color: white; border: none; padding: 6px 14px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: background 0.2s;"
+                                onmouseover="this.style.background='#0b7dda'"
+                                onmouseout="this.style.background='#2196F3'">
+                            💾 Download
+                        </button>
+                    </div>
                 </div>
+                <pre style="margin: 0; padding: 20px; color: #d4d4d4; overflow-x: auto; font-family: 'SF Mono', Monaco, Consolas, monospace; font-size: 14px; line-height: 1.6;"><code id="{code_id}">{code_clean}</code></pre>
             </div>
             """
             </h3>
             {code_blocks_html}
         </div>
+        <script>
+        function downloadCode(code, lang) {{
+            const extensions = {{
+                'python': 'py',
+                'javascript': 'js',
+                'java': 'java',
+                'cpp': 'cpp',
+                'c': 'c',
+                'html': 'html',
+                'css': 'css',
+                'typescript': 'ts',
+                'rust': 'rs',
+                'go': 'go',
+            }};
+            const ext = extensions[lang.toLowerCase()] || 'txt';
+            const filename = `solution.${{ext}}`;
+            const blob = new Blob([code], {{ type: 'text/plain' }});
+            const url = window.URL.createObjectURL(blob);
+            const a = document.createElement('a');
+            a.href = url;
+            a.download = filename;
+            document.body.appendChild(a);
+            a.click();
+            document.body.removeChild(a);
+            window.URL.revokeObjectURL(url);
+        }}
+        </script>
         """
     html = f"""
             </div>
         </div>
+        <!-- Thinking Section (Plain Text Only) -->
         {thinking_html}
         <!-- Answer Section -->
             </div>
         </div>
+        <!-- Code Blocks (From Final Answer Only) -->
         {code_html}
     </div>
     return html
 # Initialize model
+print("🔄 Initializing VibeThinker with vLLM (T4-optimized, 40K context)...")
 vibe_model = VibeThinkerVLLM()
+def generate_solution(prompt, temperature=0.6, max_tokens=16384, max_thinking_tokens=4096, progress=gr.Progress()):
     """Generate and format solution with progress tracking"""
     if not prompt.strip():
         return "<p style='color: #dc3545; font-size: 16px; padding: 20px;'>⚠️ Please enter a problem to solve.</p>"
     progress(0, desc="🔄 Initializing generation...")
     progress(0.3, desc="🧠 Model is thinking...")
     response, prompt_tokens, completion_tokens, gen_time = vibe_model.generate_response(
         prompt,
     progress(0.8, desc="📝 Formatting output...")
+    # Parse output - thinking stays as plain text, code only from answer
     thinking, answer, code_blocks = parse_model_output(response)
     # Format as HTML
     **Optimized for**: Competitive programming (LeetCode, Codeforces, AtCoder) and algorithm challenges
+    ⚡ **Powered by vLLM** (40K context) | 🎯 **Best for**: Python algorithmic problems with clear input/output specs
     ⚠️ **Note**: This model is specialized for competitive programming, not general software development
     """)
                 )
                 max_tokens_slider = gr.Slider(
                     minimum=1024,
+                    maximum=40960,
+                    value=16384,
                     step=1024,
+                    label="📝 Max Total Tokens (40K max)"
                 )
                 max_thinking_slider = gr.Slider(
                     minimum=512,
                 - Higher thinking tokens (4096-8192) for complex problems requiring detailed reasoning
                 - Temperature 0.6 balances creativity and accuracy
                 - Loop detection is automatic - repetitive output will be truncated
+                - Code blocks shown are from final solution only (not from reasoning process)
                 """)
             generate_btn = gr.Button("🚀 Generate Solution", variant="primary", size="lg")