Spaces:

WellGoods
/

VibeThinker

Running on T4

App Files Files Community

VladBoyko commited on 24 days ago

Commit

45d3861

verified ·

1 Parent(s): 6cad808

Update app.py

Browse files

Files changed (1) hide show

app.py +193 -260

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ class VibeThinkerVLLM:
             model=self.model_path,
             dtype="bfloat16",
             gpu_memory_utilization=0.9,
-            max_model_len=40960,  # Support full context length
             trust_remote_code=True
         )
@@ -30,7 +30,7 @@ class VibeThinkerVLLM:
             temperature=temperature,
             max_tokens=max_tokens,
             top_p=top_p,
-            top_k=-1,  # Disable top_k sampling
         )
         print(f"Generating with vLLM (temp={temperature}, max_tokens={max_tokens})...")
@@ -43,66 +43,24 @@ class VibeThinkerVLLM:
 def parse_model_output(text):
     """
-    Parse model output into structured components:
-    - Thinking sections (within <think> tags)
-    - Regular text (chat messages)
-    - Code blocks (within ``` or <code> tags)
     """
     sections = []
-    # Split by <think> tags
     think_pattern = r'<think>(.*?)</think>'
     code_pattern = r'```(\w+)?\n(.*?)```'
     # Extract thinking sections
     think_matches = list(re.finditer(think_pattern, text, re.DOTALL))
-    # Track positions
     last_pos = 0
     for match in think_matches:
-        # Add text before thinking section
         before_text = text[last_pos:match.start()].strip()
         if before_text:
-            # Check for code blocks in this text
-            code_blocks = list(re.finditer(code_pattern, before_text, re.DOTALL))
-            if code_blocks:
-                # Process text with code blocks
-                text_pos = 0
-                for code_match in code_blocks:
-                    # Add text before code
-                    pre_code_text = before_text[text_pos:code_match.start()].strip()
-                    if pre_code_text:
-                        sections.append({
-                            'type': 'text',
-                            'content': pre_code_text
-                        })
-                    # Add code block
-                    language = code_match.group(1) or 'plaintext'
-                    code_content = code_match.group(2).strip()
-                    sections.append({
-                        'type': 'code',
-                        'language': language,
-                        'content': code_content
-                    })
-                    text_pos = code_match.end()
-                # Add remaining text after last code block
-                remaining_text = before_text[text_pos:].strip()
-                if remaining_text:
-                    sections.append({
-                        'type': 'text',
-                        'content': remaining_text
-                    })
-            else:
-                sections.append({
-                    'type': 'text',
-                    'content': before_text
-                })
         # Add thinking section
         think_content = match.group(1).strip()
@@ -113,164 +71,68 @@ def parse_model_output(text):
         last_pos = match.end()
-    # Add remaining text after last thinking section
     remaining = text[last_pos:].strip()
     if remaining:
-        # Check for code blocks
-        code_blocks = list(re.finditer(code_pattern, remaining, re.DOTALL))
-        if code_blocks:
-            text_pos = 0
-            for code_match in code_blocks:
-                pre_code_text = remaining[text_pos:code_match.start()].strip()
-                if pre_code_text:
-                    sections.append({
-                        'type': 'text',
-                        'content': pre_code_text
-                    })
-                language = code_match.group(1) or 'plaintext'
-                code_content = code_match.group(2).strip()
-                sections.append({
-                    'type': 'code',
-                    'language': language,
-                    'content': code_content
-                })
-                text_pos = code_match.end()
-            remaining_text = remaining[text_pos:].strip()
-            if remaining_text:
-                sections.append({
-                    'type': 'text',
-                    'content': remaining_text
-                })
-        else:
-            sections.append({
-                'type': 'text',
-                'content': remaining
-            })
     return sections
-def format_output_for_display(sections):
-    """
-    Format parsed sections into a rich HTML display with:
-    - Collapsible thinking sections
-    - Syntax-highlighted code blocks
-    - Clean text rendering
-    """
-    html_parts = []
-    for i, section in enumerate(sections):
-        if section['type'] == 'thinking':
-            # Collapsible thinking section
-            html_parts.append(f"""
-            <details class="thinking-section" style="margin: 15px 0; border: 2px solid #f39c12; border-radius: 8px; background-color: #fff9e6;">
-                <summary style="padding: 12px; cursor: pointer; font-weight: bold; color: #d68910; user-select: none;">
-                    🤔 Thinking Process (Click to expand)
-                </summary>
-                <div style="padding: 15px; border-top: 1px solid #f39c12; background-color: #fffef7; white-space: pre-wrap; font-family: 'Courier New', monospace; font-size: 13px; color: #333; line-height: 1.6;">
-{section['content']}
-                </div>
-            </details>
-            """)
-        elif section['type'] == 'code':
-            # Code block with copy/download buttons
-            code_id = f"code-{i}"
-            html_parts.append(f"""
-            <details class="code-section" open style="margin: 15px 0; border: 2px solid #3498db; border-radius: 8px; background-color: #e8f4fd;">
-                <summary style="padding: 12px; cursor: pointer; font-weight: bold; color: #2874a6; user-select: none;">
-                    💻 Code ({section['language']}) - Click to collapse
-                </summary>
-                <div style="position: relative; padding: 0;">
-                    <div style="position: absolute; top: 10px; right: 10px; z-index: 10;">
-                        <button onclick="copyCode('{code_id}')" style="padding: 6px 12px; margin-right: 5px; background-color: #3498db; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px;">
-                            📋 Copy
-                        </button>
-                        <button onclick="downloadCode('{code_id}', '{section['language']}')" style="padding: 6px 12px; background-color: #27ae60; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px;">
-                            ⬇️ Download
-                        </button>
-                    </div>
-                    <pre id="{code_id}" style="margin: 0; padding: 40px 15px 15px 15px; background-color: #f8f9fa; border-top: 1px solid #3498db; overflow-x: auto; font-family: 'Courier New', monospace; font-size: 13px; line-height: 1.5;"><code class="language-{section['language']}">{section['content']}</code></pre>
-                </div>
-            </details>
-            """)
-        else:  # text
-            # Regular text output
-            html_parts.append(f"""
-            <div class="text-section" style="margin: 15px 0; padding: 15px; border: 1px solid #bdc3c7; border-radius: 8px; background-color: #ffffff; white-space: pre-wrap; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; font-size: 14px; line-height: 1.8; color: #2c3e50;">
-{section['content']}
-            </div>
-            """)
-    # Add JavaScript for copy and download functionality
-    js_code = """
-    <script>
-    function copyCode(elementId) {
-        const codeElement = document.getElementById(elementId);
-        const code = codeElement.textContent;
-        navigator.clipboard.writeText(code).then(() => {
-            alert('Code copied to clipboard!');
-        }).catch(err => {
-            console.error('Failed to copy:', err);
-        });
-    }
-    function downloadCode(elementId, language) {
-        const codeElement = document.getElementById(elementId);
-        const code = codeElement.textContent;
-        // Determine file extension
-        const extensions = {
-            'python': 'py',
-            'javascript': 'js',
-            'typescript': 'ts',
-            'html': 'html',
-            'css': 'css',
-            'java': 'java',
-            'cpp': 'cpp',
-            'c': 'c',
-            'ruby': 'rb',
-            'go': 'go',
-            'rust': 'rs',
-            'swift': 'swift',
-            'kotlin': 'kt',
-            'plaintext': 'txt'
-        };
-        const ext = extensions[language.toLowerCase()] || 'txt';
-        const filename = `code_snippet.${ext}`;
-        // Create blob and download
-        const blob = new Blob([code], { type: 'text/plain' });
-        const url = window.URL.createObjectURL(blob);
-        const a = document.createElement('a');
-        a.href = url;
-        a.download = filename;
-        document.body.appendChild(a);
-        a.click();
-        document.body.removeChild(a);
-        window.URL.revokeObjectURL(url);
-    }
-    </script>
-    """
-    return js_code + "\n".join(html_parts)
 # Initialize model
 print("Initializing VibeThinker-1.5B with vLLM...")
 model = VibeThinkerVLLM()
-# Create Gradio interface
 def generate_response(prompt, temperature, max_tokens, top_p):
     if not prompt.strip():
-        return "<p style='color: red;'>Please enter a question.</p>"
     try:
         # Generate raw response
@@ -281,64 +143,102 @@ def generate_response(prompt, temperature, max_tokens, top_p):
             top_p=top_p
         )
-        # Parse and format the response
         sections = parse_model_output(raw_response)
-        formatted_html = format_output_for_display(sections)
-        return formatted_html
     except Exception as e:
-        return f"<p style='color: red;'><strong>Error:</strong> {str(e)}</p>"
-# Custom CSS for better styling
-custom_css = """
-.thinking-section summary:hover {
-    background-color: #fef5e7;
-}
-.code-section summary:hover {
-    background-color: #d6eaf8;
-}
-.text-section {
-    box-shadow: 0 2px 4px rgba(0,0,0,0.05);
-}
-details[open] summary {
-    border-bottom: 1px solid #ddd;
-    margin-bottom: 10px;
-}
-/* Syntax highlighting enhancements */
-code {
-    font-family: 'Fira Code', 'Courier New', monospace;
-}
-"""
-# Gradio UI
-with gr.Blocks(title="VibeThinker-1.5B Advanced", css=custom_css) as demo:
     gr.Markdown("""
     # 🧠 VibeThinker-1.5B: Advanced Reasoning Interface
-    **Optimized with vLLM** for 10-20x faster inference! ⚡
-    **Features**:
-    - 🤔 **Collapsible Thinking Sections**: See the model's reasoning process
-    - 💻 **Interactive Code Blocks**: Copy or download code snippets
-    - 📝 **Clean Text Display**: Easy-to-read formatted responses
-    **Best for**: Competitive math problems and algorithm coding challenges
-    [GitHub](https://github.com/WeiboAI/VibeThinker) | [HuggingFace Model](https://huggingface.co/WeiboAI/VibeThinker-1.5B) | [Paper](https://huggingface.co/papers/2511.06221)
     """)
     with gr.Row():
         with gr.Column(scale=1):
             prompt_input = gr.Textbox(
-                label="Your Question",
-                placeholder="Ask a math problem or coding challenge (in English)...",
-                lines=6
             )
             with gr.Accordion("⚙️ Advanced Settings", open=False):
@@ -347,7 +247,8 @@ with gr.Blocks(title="VibeThinker-1.5B Advanced", css=custom_css) as demo:
                     maximum=1.5,
                     value=0.6,
                     step=0.1,
-                    label="Temperature (0.6 or 1.0 recommended)"
                 )
                 max_tokens_slider = gr.Slider(
@@ -355,7 +256,8 @@ with gr.Blocks(title="VibeThinker-1.5B Advanced", css=custom_css) as demo:
                     maximum=40960,
                     value=8192,
                     step=512,
-                    label="Max Tokens"
                 )
                 top_p_slider = gr.Slider(
@@ -363,58 +265,89 @@ with gr.Blocks(title="VibeThinker-1.5B Advanced", css=custom_css) as demo:
                     maximum=1.0,
                     value=0.95,
                     step=0.05,
-                    label="Top P"
                 )
-            submit_btn = gr.Button("🚀 Generate Solution", variant="primary", size="lg")
-            clear_btn = gr.Button("🗑️ Clear", size="sm")
         with gr.Column(scale=1):
-            output_html = gr.HTML(
-                label="Model Response",
-                value="<p style='color: #7f8c8d; text-align: center; padding: 40px;'>Your response will appear here...</p>"
-            )
-    # Example questions
     gr.Examples(
         examples=[
-            ["Make me a single page html application that takes a color and outputs a color theme based on that color", 0.6, 16384, 0.95],
-            ["Solve this AIME problem: Find the number of positive integers n ≤ 1000 such that n^2 + n + 41 is prime.", 0.6, 12288, 0.95],
-            ["Write a Python function to implement the Euclidean algorithm for finding GCD, then optimize it.", 0.6, 8192, 0.95],
-            ["Prove that the sum of the first n odd numbers equals n^2 using mathematical induction.", 0.6, 8192, 0.95],
         ],
         inputs=[prompt_input, temperature_slider, max_tokens_slider, top_p_slider],
-        label="📚 Example Problems"
     )
     # Event handlers
     submit_btn.click(
         fn=generate_response,
         inputs=[prompt_input, temperature_slider, max_tokens_slider, top_p_slider],
-        outputs=output_html
     )
     clear_btn.click(
-        fn=lambda: ("", "<p style='color: #7f8c8d; text-align: center; padding: 40px;'>Your response will appear here...</p>"),
         inputs=[],
-        outputs=[prompt_input, output_html]
     )
-    gr.Markdown("""
-    ---
-    ### 📊 Performance Comparison:
-    | Metric | VibeThinker-1.5B | DeepSeek R1 (671B) | Size Ratio |
-    |--------|------------------|---------------------|------------|
-    | AIME24 | **80.3** | 79.8 | **400× smaller** |
-    | AIME25 | **74.4** | 70.0 | **400× smaller** |
-    | HMMT25 | **50.4** | 41.7 | **400× smaller** |
-    | Training Cost | **$7,800** | $294,000+ | **40× cheaper** |
-    🚀 **Powered by vLLM** for ultra-fast inference on T4 GPUs
-    """)
-# Launch the app
 if __name__ == "__main__":
-    demo.queue(max_size=20)
-    demo.launch(share=False)

             model=self.model_path,
             dtype="bfloat16",
             gpu_memory_utilization=0.9,
+            max_model_len=40960,
             trust_remote_code=True
         )
             temperature=temperature,
             max_tokens=max_tokens,
             top_p=top_p,
+            top_k=-1,
         )
         print(f"Generating with vLLM (temp={temperature}, max_tokens={max_tokens})...")
 def parse_model_output(text):
     """
+    Parse model output into structured components using Gradio 5 native components
     """
     sections = []
+    # Patterns
     think_pattern = r'<think>(.*?)</think>'
     code_pattern = r'```(\w+)?\n(.*?)```'
     # Extract thinking sections
     think_matches = list(re.finditer(think_pattern, text, re.DOTALL))
     last_pos = 0
     for match in think_matches:
+        # Process text before thinking section
         before_text = text[last_pos:match.start()].strip()
         if before_text:
+            sections.extend(parse_text_with_code(before_text))
         # Add thinking section
         think_content = match.group(1).strip()
         last_pos = match.end()
+    # Process remaining text
     remaining = text[last_pos:].strip()
     if remaining:
+        sections.extend(parse_text_with_code(remaining))
     return sections
+def parse_text_with_code(text):
+    """Helper function to parse text containing code blocks"""
+    sections = []
+    code_pattern = r'```(\w+)?\n(.*?)```'
+    code_blocks = list(re.finditer(code_pattern, text, re.DOTALL))
+    if not code_blocks:
+        return [{'type': 'text', 'content': text}]
+    text_pos = 0
+    for code_match in code_blocks:
+        # Add text before code
+        pre_code_text = text[text_pos:code_match.start()].strip()
+        if pre_code_text:
+            sections.append({
+                'type': 'text',
+                'content': pre_code_text
+            })
+        # Add code block
+        language = code_match.group(1) or 'python'
+        code_content = code_match.group(2).strip()
+        sections.append({
+            'type': 'code',
+            'language': language,
+            'content': code_content
+        })
+        text_pos = code_match.end()
+    # Add remaining text
+    remaining_text = text[text_pos:].strip()
+    if remaining_text:
+        sections.append({
+            'type': 'text',
+            'content': remaining_text
+        })
+    return sections
 # Initialize model
 print("Initializing VibeThinker-1.5B with vLLM...")
 model = VibeThinkerVLLM()
 def generate_response(prompt, temperature, max_tokens, top_p):
+    """
+    Generate and parse response using Gradio 5 native components
+    Returns a list of components to render
+    """
     if not prompt.strip():
+        yield [gr.Markdown("⚠️ Please enter a question.")]
+        return
     try:
         # Generate raw response
             top_p=top_p
         )
+        # Parse the response
         sections = parse_model_output(raw_response)
+        # Build component list for Gradio 5
+        components = []
+        for i, section in enumerate(sections):
+            if section['type'] == 'thinking':
+                # Use Accordion for collapsible thinking
+                with gr.Accordion("🤔 Thinking Process", open=False):
+                    components.append(gr.Textbox(
+                        value=section['content'],
+                        lines=20,
+                        max_lines=50,
+                        show_label=False,
+                        container=False,
+                        interactive=False
+                    ))
+            elif section['type'] == 'code':
+                # Use native Code component in Gradio 5
+                with gr.Accordion(f"💻 Code ({section['language']})", open=True):
+                    components.append(gr.Code(
+                        value=section['content'],
+                        language=section['language'],
+                        lines=20,
+                        show_label=False,
+                        interactive=False
+                    ))
+            else:  # text
+                components.append(gr.Markdown(section['content']))
+        yield components
     except Exception as e:
+        yield [gr.Markdown(f"❌ **Error:** {str(e)}")]
+# Custom theme for Gradio 5
+theme = gr.themes.Soft(
+    primary_hue="blue",
+    secondary_hue="purple",
+    neutral_hue="slate",
+    font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
+).set(
+    button_primary_background_fill="*primary_600",
+    button_primary_background_fill_hover="*primary_700",
+    block_label_text_weight="600",
+    block_title_text_weight="700",
+)
+# Gradio 5 UI with modern components
+with gr.Blocks(
+    title="VibeThinker-1.5B Advanced",
+    theme=theme,
+    fill_height=False,
+    css="""
+    .thinking-box {
+        background-color: #fff9e6;
+        border: 2px solid #f39c12;
+        border-radius: 8px;
+        padding: 15px;
+        font-family: 'Courier New', monospace;
+    }
+    .code-box {
+        background-color: #e8f4fd;
+        border: 2px solid #3498db;
+        border-radius: 8px;
+    }
+    """,
+) as demo:
     gr.Markdown("""
     # 🧠 VibeThinker-1.5B: Advanced Reasoning Interface
+    **⚡ Powered by vLLM** for 10-20x faster inference on GPU!
+    ### ✨ Features:
+    - 🤔 **Collapsible Thinking Sections** - Explore the model's reasoning process
+    - 💻 **Syntax-Highlighted Code** - Native code display with copy functionality
+    - 📝 **Clean Markdown Output** - Beautiful formatting for text responses
+    **Best for:** Competitive math problems and algorithm coding challenges
+    [GitHub](https://github.com/WeiboAI/VibeThinker) | [Model](https://huggingface.co/WeiboAI/VibeThinker-1.5B) | [Paper](https://huggingface.co/papers/2511.06221)
     """)
     with gr.Row():
         with gr.Column(scale=1):
             prompt_input = gr.Textbox(
+                label="💬 Your Question",
+                placeholder="Ask a math problem or coding challenge (English works best)...",
+                lines=6,
+                max_lines=15
             )
             with gr.Accordion("⚙️ Advanced Settings", open=False):
                     maximum=1.5,
                     value=0.6,
                     step=0.1,
+                    label="🌡️ Temperature",
+                    info="0.6 or 1.0 recommended"
                 )
                 max_tokens_slider = gr.Slider(
                     maximum=40960,
                     value=8192,
                     step=512,
+                    label="📏 Max Tokens",
+                    info="Model supports up to 40,960 tokens"
                 )
                 top_p_slider = gr.Slider(
                     maximum=1.0,
                     value=0.95,
                     step=0.05,
+                    label="🎯 Top P",
+                    info="Nucleus sampling parameter"
                 )
+            with gr.Row():
+                submit_btn = gr.Button(
+                    "🚀 Generate Solution",
+                    variant="primary",
+                    scale=2
+                )
+                clear_btn = gr.Button(
+                    "🗑️ Clear",
+                    variant="secondary",
+                    scale=1
+                )
         with gr.Column(scale=1):
+            # Output column using render for dynamic components
+            output_column = gr.Column()
+            with output_column:
+                initial_state = gr.Markdown(
+                    """
+                    <div style='text-align: center; padding: 60px; color: #7f8c8d;'>
+                        <h3>👋 Ready to solve problems!</h3>
+                        <p>Enter your question and click Generate Solution</p>
+                    </div>
+                    """
+                )
+    # Example problems
     gr.Examples(
         examples=[
+            ["Make me a single page HTML application that takes a color and outputs a color theme", 0.6, 16384, 0.95],
+            ["Solve: Find the number of positive integers n ≤ 1000 such that n^2 + n + 41 is prime.", 0.6, 12288, 0.95],
+            ["Write an efficient Python implementation of the Sieve of Eratosthenes algorithm.", 0.6, 8192, 0.95],
+            ["Prove using mathematical induction that 1 + 2 + 3 + ... + n = n(n+1)/2", 0.6, 8192, 0.95],
         ],
         inputs=[prompt_input, temperature_slider, max_tokens_slider, top_p_slider],
+        label="📚 Example Problems",
+        examples_per_page=4
     )
+    gr.Markdown("""
+    ---
+    ### 📊 Performance Highlights:
+    | Benchmark | VibeThinker-1.5B | DeepSeek R1 (671B) | Advantage |
+    |-----------|------------------|---------------------|-----------|
+    | **AIME24** | **80.3** ✨ | 79.8 | 400× smaller! |
+    | **AIME25** | **74.4** ✨ | 70.0 | 400× smaller! |
+    | **HMMT25** | **50.4** ✨ | 41.7 | 400× smaller! |
+    | **Training Cost** | **$7,800** | $294,000+ | 40× cheaper! |
+    💡 **Powered by Spectrum-to-Signal Principle (SSP)** training framework
+    """)
     # Event handlers
+    def clear_interface():
+        return "", None
     submit_btn.click(
         fn=generate_response,
         inputs=[prompt_input, temperature_slider, max_tokens_slider, top_p_slider],
+        outputs=[output_column],
+        show_progress="full"
     )
     clear_btn.click(
+        fn=clear_interface,
         inputs=[],
+        outputs=[prompt_input, output_column]
     )
+# Launch with Gradio 5 optimizations
 if __name__ == "__main__":
+    demo.queue(
+        max_size=20,
+        default_concurrency_limit=10
+    )
+    demo.launch(
+        ssr_mode=True,  # Enable server-side rendering for faster loads
+        show_api=True,
+        show_error=True,
+    )