Spaces:

WellGoods
/

VibeThinker

Sleeping

App Files Files Community

VladBoyko commited on 17 days ago

Commit

8a46019

verified ·

1 Parent(s): ebe1956

Update app.py

Browse files

Updated Output handling

Files changed (1) hide show

app.py +280 -478

app.py CHANGED Viewed

@@ -1,76 +1,97 @@
 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-from threading import Thread
 import re
 import time
 class VibeThinkerModel:
     def __init__(self):
         self.model = None
         self.tokenizer = None
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.load_model()
     def load_model(self):
-        """Load VibeThinker model with transformers"""
         try:
-            print("🔄 Loading VibeThinker-1.5B with transformers...")
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                "WeiboAI/VibeThinker-1.5B",
-                trust_remote_code=True
-            )
             self.model = AutoModelForCausalLM.from_pretrained(
-                "WeiboAI/VibeThinker-1.5B",
-                torch_dtype=torch.float16,
                 device_map="auto",
-                trust_remote_code=True
             )
-            print(f"✅ Model loaded successfully on {self.device}")
-            print(f"💾 Model memory: ~{self.model.get_memory_footprint() / 1e9:.2f} GB")
-        except Exception as e:
-            print(f"❌ Error loading model: {e}")
-            raise
-    def generate_response_streaming(self, prompt, temperature=0.6, max_new_tokens=8192, max_thinking_tokens=4096):
-        """Generate response with streaming and real-time loop detection"""
-        if not self.model or not self.tokenizer:
-            yield "Model not loaded!", None, False
-            return
         try:
             start_time = time.time()
-            formatted_prompt = f"""<|im_start|>system
-You are a competitive programming expert. Provide clear, concise solutions to coding problems.
-Format your response as:
-1. Brief analysis (2-3 sentences)
-2. Solution approach
-3. Implementation code
-4. Test cases
-Keep reasoning under {max_thinking_tokens} tokens. Be direct and avoid repetition.<|im_end|>
-<|im_start|>user
-{prompt}<|im_end|>
-<|im_start|>assistant
-"""
-            inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.device)
-            prompt_length = inputs.input_ids.shape[1]
-            # Create streamer
-            streamer = TextIteratorStreamer(
-                self.tokenizer,
-                skip_prompt=True,
-                skip_special_tokens=True
-            )
-            # Generation kwargs
             generation_kwargs = dict(
                 **inputs,
                 max_new_tokens=max_new_tokens,
@@ -78,494 +99,275 @@ Keep reasoning under {max_thinking_tokens} tokens. Be direct and avoid repetitio
                 top_p=0.95,
                 top_k=50,
                 do_sample=True,
-                repetition_penalty=1.1,
                 pad_token_id=self.tokenizer.eos_token_id,
                 streamer=streamer,
             )
-            # Start generation in background thread
             thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
             thread.start()
-            # Stream tokens with real-time loop detection
             generated_text = ""
-            last_loop_check = ""
             loop_detected = False
             for new_text in streamer:
-                if loop_detected:
-                    break  # Stop streaming if loop detected
                 generated_text += new_text
-                generation_time = time.time() - start_time
-                tokens_generated = len(self.tokenizer.encode(generated_text))
-                # Check for loops every ~50 tokens
-                if len(generated_text) - len(last_loop_check) > 200:
-                    if self._detect_loop_realtime(generated_text):
                         loop_detected = True
-                        # Truncate at loop point
-                        generated_text = self._truncate_loop(generated_text)
-                    last_loop_check = generated_text
-                # Yield current state
                 yield generated_text, {
-                    "prompt_tokens": prompt_length,
-                    "tokens_generated": tokens_generated,
-                    "generation_time": generation_time,
-                    "is_complete": False
-                }, loop_detected
-            # Wait for thread to complete
-            thread.join()
-            # Final yield
-            final_time = time.time() - start_time
-            final_tokens = len(self.tokenizer.encode(generated_text))
             yield generated_text, {
-                "prompt_tokens": prompt_length,
-                "completion_tokens": final_tokens,
-                "generation_time": final_time,
-                "tokens_per_sec": final_tokens / final_time if final_time > 0 else 0,
-                "is_complete": True
-            }, loop_detected
         except Exception as e:
-            yield f"Error during generation: {str(e)}", None, False
-    def _detect_loop_realtime(self, text, check_window=200, min_repetitions=5):
-        """Quick loop detection for real-time streaming.
-        Real loops repeat SHORT phrases (3-9 words) MANY times (5+ repetitions).
-        Example: "Wait, let me reconsider... Wait, let me reconsider... Wait, let me reconsider..."
-        Args:
-            text: Full generated text
-            check_window: Number of recent words to check (default: 200)
-            min_repetitions: Minimum repetitions to consider a loop (default: 5)
-        Returns:
-            bool: True if a loop is detected
-        """
-        words = text.split()
-        if len(words) < 30:
-            return False
-        # Check last N words for repetitive patterns
-        recent_words = words[-check_window:] if len(words) > check_window else words
-        text_to_check = ' '.join(recent_words)
-        # Look for short phrases (3-9 words) repeated multiple times
-        for phrase_len in range(3, 10):  # 3 to 9 words
-            if len(recent_words) < phrase_len * min_repetitions:
-                continue
-            # Check different starting positions
-            for i in range(len(recent_words) - phrase_len):
-                phrase = ' '.join(recent_words[i:i+phrase_len])
-                # Count how many times this phrase appears
-                count = text_to_check.count(phrase)
-                # If phrase appears 5+ times, it's a loop
-                if count >= min_repetitions:
-                    return True
-        return False
-    def _truncate_loop(self, text, min_repetitions=5):
-        """Truncate text at the point where loop starts.
-        Find where a 3-9 word phrase starts repeating 5+ times and truncate there.
-        """
-        words = text.split()
-        # Check for short phrases (3-9 words) repeated multiple times
-        for phrase_len in range(3, 10):
-            if len(words) < phrase_len * min_repetitions:
-                continue
-            # Scan through text to find loop start point
-            for i in range(len(words) - phrase_len * min_repetitions):
-                phrase = ' '.join(words[i:i+phrase_len])
-                # Count consecutive repetitions starting from position i
-                repetition_count = 0
-                check_pos = i
-                while check_pos + phrase_len <= len(words):
-                    check_phrase = ' '.join(words[check_pos:check_pos+phrase_len])
-                    if check_phrase == phrase:
-                        repetition_count += 1
-                        check_pos += phrase_len
-                    else:
-                        break
-                # If we found 5+ consecutive repetitions, truncate at loop start
-                if repetition_count >= min_repetitions:
-                    return ' '.join(words[:i])
-        # If no clear loop found, return original
-        return text
-# Initialize model
-print("🔄 Initializing VibeThinker-1.5B...")
 vibe_model = VibeThinkerModel()
-class IntelligentStreamParser:
-    """Parse streaming output in real-time into sections"""
-    def __init__(self):
-        self.reset()
-    def reset(self):
-        self.thinking = ""
-        self.explanation = ""
-        self.code_blocks = []
-        self.current_code_lang = None
-        self.current_code_content = ""
-        self.in_code_block = False
-        self.phase = "thinking"  # thinking -> explanation -> code
-    def parse_chunk(self, full_text):
-        """Parse text in real-time as it streams"""
-        # Detect code blocks with regex
-        code_pattern = r'```(\w+)?\n(.*?)```'
-        found_codes = re.findall(code_pattern, full_text, re.DOTALL)
-        # Remove code blocks from text for section detection
-        text_without_code = re.sub(code_pattern, '###CODE_PLACEHOLDER###', full_text, flags=re.DOTALL)
-        # Try to split thinking and explanation
-        thinking_content = ""
-        explanation_content = text_without_code
-        # Check for explicit markers
-        if "Solution:" in text_without_code or "Explanation:" in text_without_code:
-            parts = re.split(r'(?:Solution|Explanation):', text_without_code, maxsplit=1)
-            if len(parts) == 2:
-                thinking_content = parts[0].strip()
-                explanation_content = parts[1].strip()
-        elif "```" in text_without_code:
-            # Split at first code block
-            parts = text_without_code.split("###CODE_PLACEHOLDER###", maxsplit=1)
-            if len(parts) == 2 and len(parts[0]) > 100:
-                # Check if first part looks like thinking
-                first_part_lower = parts[0].lower()
-                thinking_keywords = ['approach', 'idea', 'step', 'first', "let's", 'plan', 'strategy']
-                if any(kw in first_part_lower for kw in thinking_keywords):
-                    thinking_content = parts[0].strip()
-                    explanation_content = parts[1].strip()
-        # Clean up placeholders
-        explanation_content = explanation_content.replace('###CODE_PLACEHOLDER###', '').strip()
-        return {
-            'thinking': thinking_content,
-            'explanation': explanation_content,
-            'code_blocks': found_codes
-        }
-parser = IntelligentStreamParser()
-def format_streaming_html(generated_text, stats, loop_detected, is_generating=True):
-    """Format streaming output with intelligent parsing"""
-    # Parse the current text
-    parsed = parser.parse_chunk(generated_text)
-    thinking = parsed['thinking']
-    explanation = parsed['explanation']
-    code_blocks = parsed['code_blocks']
-    # Stats
-    if stats:
-        prompt_tokens = stats.get('prompt_tokens', 0)
-        tokens_generated = stats.get('tokens_generated', 0) or stats.get('completion_tokens', 0)
-        generation_time = stats.get('generation_time', 0)
-        tokens_per_sec = stats.get('tokens_per_sec', 0) or (tokens_generated / generation_time if generation_time > 0 else 0)
-        is_complete = stats.get('is_complete', False)
-    else:
-        prompt_tokens = tokens_generated = generation_time = tokens_per_sec = 0
-        is_complete = False
-    thinking_tokens_est = len(thinking.split()) * 1.3 if thinking else 0
-    total_tokens = prompt_tokens + tokens_generated
-    # Card style
-    card_base_style = "background: #ffffff; border-radius: 12px; padding: 24px; margin-bottom: 20px; box-shadow: 0 2px 8px rgba(0,0,0,0.08);"
-    # Blink cursor CSS
-    cursor_style = """
-    <style>
-    @keyframes blink {
-        0%, 49% { opacity: 1; }
-        50%, 100% { opacity: 0; }
-    }
-    .cursor {
-        display: inline-block;
-        width: 2px;
-        height: 1em;
-        background: #667eea;
-        margin-left: 2px;
-        animation: blink 0.7s infinite;
-    }
-    </style>
-    """
-    # Status message
-    status_emoji = "✅" if is_complete else "🔄"
-    status_text = "Complete" if is_complete else "Generating..."
-    # Stats card
-    html = f"""
-    {cursor_style}
-    <div style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; max-width: 100%; margin: 0 auto; background: #f8f9fa; padding: 20px; border-radius: 12px;">
-        <!-- Stats Card -->
-        <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 24px; border-radius: 12px; margin-bottom: 20px; color: white; box-shadow: 0 4px 12px rgba(102,126,234,0.3);">
-            <h3 style="margin: 0 0 16px 0; font-size: 17px; font-weight: 600; opacity: 0.95;">{status_emoji} {status_text}</h3>
-            <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(130px, 1fr)); gap: 12px; font-size: 13px;">
-                <div style="background: rgba(255,255,255,0.15); padding: 14px; border-radius: 8px; backdrop-filter: blur(10px);">
-                    <div style="opacity: 0.85; font-size: 11px; margin-bottom: 6px; text-transform: uppercase; letter-spacing: 0.5px;">Time</div>
-                    <div style="font-size: 22px; font-weight: 700;">{generation_time:.1f}s</div>
-                </div>
-                <div style="background: rgba(255,255,255,0.15); padding: 14px; border-radius: 8px; backdrop-filter: blur(10px);">
-                    <div style="opacity: 0.85; font-size: 11px; margin-bottom: 6px; text-transform: uppercase; letter-spacing: 0.5px;">Speed</div>
-                    <div style="font-size: 22px; font-weight: 700;">{tokens_per_sec:.1f} t/s</div>
                 </div>
-                <div style="background: rgba(255,255,255,0.15); padding: 14px; border-radius: 8px; backdrop-filter: blur(10px);">
-                    <div style="opacity: 0.85; font-size: 11px; margin-bottom: 6px; text-transform: uppercase; letter-spacing: 0.5px;">Tokens</div>
-                    <div style="font-size: 22px; font-weight: 700;">{tokens_generated:,}</div>
-                </div>
-            </div>
-        </div>
-    """
-    # Loop warning (if detected)
-    if loop_detected:
-        html += f"""
-        <div style="{card_base_style} border-left: 4px solid #ffc107; background: #fff3cd;">
-            <div style="color: #856404; font-weight: 600; display: flex; align-items: center; gap: 8px;">
-                <span style="font-size: 20px;">⚠️</span>
-                <span>Loop Detected - Generation stopped to prevent repetition</span>
-            </div>
-        </div>
-        """
-    # Thinking section (collapsed if exists)
-    if thinking and len(thinking.strip()) > 0:
-        thinking_escaped = thinking.replace('<', '&lt;').replace('>', '&gt;')
-        cursor_html = '<span class="cursor"></span>' if is_generating and not explanation and not code_blocks else ''
-        html += f"""
-        <details style="{card_base_style} border-left: 4px solid #8b5cf6;">
-            <summary style="cursor: pointer; font-weight: 600; font-size: 16px; color: #7c3aed; user-select: none; display: flex; align-items: center; gap: 10px; padding: 4px 0;">
-                <span style="font-size: 20px;">🧠</span>
-                <span>Reasoning Process</span>
-                <span style="margin-left: auto; font-size: 13px; color: #8b5cf6; font-weight: normal;">~{int(thinking_tokens_est):,} tokens • Click to expand ▼</span>
-            </summary>
-            <div style="margin-top: 20px; padding-top: 20px; border-top: 2px solid #e9ecef; color: #495057; line-height: 1.8; white-space: pre-wrap; font-size: 14px; font-family: 'SF Mono', Monaco, 'Courier New', monospace;">
-{thinking_escaped}{cursor_html}
-            </div>
-        </details>
-        """
-    # Explanation section
-    if explanation and len(explanation.strip()) > 10:
-        explanation_escaped = explanation.replace('<', '&lt;').replace('>', '&gt;')
-        cursor_html = '<span class="cursor"></span>' if is_generating and not code_blocks else ''
-        html += f"""
-        <div style="{card_base_style} border-left: 4px solid #10b981;">
-            <h3 style="margin: 0 0 16px 0; color: #10b981; font-size: 18px; font-weight: 600; display: flex; align-items: center; gap: 10px;">
-                <span style="font-size: 22px;">💡</span>
-                <span>Solution Explanation</span>
-            </h3>
-            <div style="color: #495057; line-height: 1.8; font-size: 15px; white-space: pre-wrap;">
-{explanation_escaped}{cursor_html}
             </div>
-        </div>
         """
-    # Code blocks
-    if code_blocks and len(code_blocks) > 0:
-        code_blocks_html = ""
-        for idx, (lang, code) in enumerate(code_blocks):
-            lang_display = lang if lang else "code"
-            code_id = f"code_{idx}_{int(time.time()*1000)}"
-            code_clean = code.strip()
-            # Add cursor to last code block if generating
-            cursor_html = '<span class="cursor"></span>' if is_generating and idx == len(code_blocks) - 1 else ''
-            code_blocks_html += f"""
-            <div style="margin-bottom: 16px; background: #1e1e1e; border-radius: 10px; overflow: hidden; box-shadow: 0 2px 8px rgba(0,0,0,0.15);">
-                <div style="background: #2d2d2d; padding: 12px 20px; color: #e0e0e0; font-weight: 600; font-size: 13px; display: flex; justify-content: space-between; align-items: center;">
-                    <span style="display: flex; align-items: center; gap: 8px;">
-                        <span style="font-size: 16px;">💻</span>
-                        <span>{lang_display}</span>
-                    </span>
-                    <div style="display: flex; gap: 8px;">
-                        <button onclick="navigator.clipboard.writeText(document.getElementById('{code_id}').textContent.replace('▌', '')); this.textContent='✓ Copied'; setTimeout(() => this.textContent='📋 Copy', 2000)"
-                                style="background: #28a745; color: white; border: none; padding: 7px 16px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: all 0.2s;"
-                                onmouseover="if(this.textContent==='📋 Copy') this.style.background='#218838'"
-                                onmouseout="if(this.textContent==='📋 Copy') this.style.background='#28a745'">
-                            📋 Copy
-                        </button>
-                        <button onclick="downloadCode(document.getElementById('{code_id}').textContent.replace('▌', ''), '{lang_display}')"
-                                style="background: #007bff; color: white; border: none; padding: 7px 16px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: all 0.2s;"
-                                onmouseover="this.style.background='#0056b3'"
-                                onmouseout="this.style.background='#007bff'">
-                            💾 Download
-                        </button>
-                    </div>
                 </div>
-                <pre style="margin: 0; padding: 20px; color: #d4d4d4; overflow-x: auto; font-family: 'SF Mono', Monaco, 'Courier New', monospace; font-size: 14px; line-height: 1.6; background: #1e1e1e;"><code id="{code_id}">{code_clean}{cursor_html}</code></pre>
-            </div>
             """
-        html += f"""
-        <div style="{card_base_style} border-left: 4px solid #6b7280;">
-            <h3 style="margin: 0 0 20px 0; color: #6b7280; font-size: 18px; font-weight: 600; display: flex; align-items: center; gap: 10px;">
-                <span style="font-size: 22px;">💻</span>
-                <span>Implementation</span>
-            </h3>
-            {code_blocks_html}
-        </div>
-        <script>
-        function downloadCode(code, lang) {{
-            const extensions = {{
-                'python': 'py', 'javascript': 'js', 'java': 'java',
-                'cpp': 'cpp', 'c': 'c', 'html': 'html', 'css': 'css',
-                'typescript': 'ts', 'rust': 'rs', 'go': 'go',
-            }};
-            const ext = extensions[lang.toLowerCase()] || 'txt';
-            const filename = `solution.${{ext}}`;
-            const blob = new Blob([code], {{ type: 'text/plain' }});
-            const url = window.URL.createObjectURL(blob);
-            const a = document.createElement('a');
-            a.href = url;
-            a.download = filename;
-            document.body.appendChild(a);
-            a.click();
-            document.body.removeChild(a);
-            window.URL.revokeObjectURL(url);
-        }}
-        </script>
         """
-    html += "</div>"
-    return html
-def generate_solution_streaming(prompt, temperature=0.6, max_tokens=8192, max_thinking_tokens=4096):
-    """Generate solution with streaming"""
-    if not prompt.strip():
-        yield "<p style='color: #dc3545; font-size: 16px; padding: 20px;'>⚠️ Please enter a problem to solve.</p>"
-        return
-    # Reset parser
-    parser.reset()
-    # Stream generation
-    for generated_text, stats, loop_detected in vibe_model.generate_response_streaming(
-        prompt,
-        temperature=temperature,
-        max_new_tokens=max_tokens,
-        max_thinking_tokens=max_thinking_tokens
-    ):
         if stats:
-            is_generating = not stats.get('is_complete', False)
-            html_output = format_streaming_html(generated_text, stats, loop_detected, is_generating)
-            yield html_output
         else:
-            yield f"<p style='color: #dc3545;'>Error: {generated_text}</p>"
-# Create Gradio interface
 with gr.Blocks(
-    theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"),
-    css=".gradio-container { max-width: 1400px !important; }"
 ) as demo:
-    gr.Markdown("""
-    # 🧠 VibeThinker-1.5B Competitive Coding Assistant
-    **Optimized for**: Competitive programming (LeetCode, Codeforces, AtCoder) and algorithm challenges
-    🎯 **Best for**: Python algorithmic problems with clear input/output specifications
-    ⚠️ **Note**: This model is specialized for competitive programming, not general software development
-    ✨ **Features**: Real-time streaming with intelligent section parsing and automatic loop detection
     """)
     with gr.Row():
-        with gr.Column(scale=1):
-            prompt_input = gr.Textbox(
-                label="💭 Your Coding Problem",
-                placeholder="Example: Write a Python function to find the longest palindromic substring in a given string. Include test cases.",
-                lines=8
             )
-            with gr.Accordion("⚙️ Advanced Settings", open=False):
-                temperature_slider = gr.Slider(
-                    minimum=0.0, maximum=1.0, value=0.6, step=0.1,
-                    label="🌡️ Temperature (0.6 recommended)"
-                )
-                max_tokens_slider = gr.Slider(
-                    minimum=1024, maximum=16384, value=8192, step=1024,
-                    label="📝 Max New Tokens"
-                )
-                max_thinking_slider = gr.Slider(
-                    minimum=512, maximum=8192, value=3072, step=512,
-                    label="🧠 Max Thinking Tokens (hint for prompt)"
-                )
-                gr.Markdown("""
-                **Tips:**
-                - Lower thinking tokens (1024-2048) for faster, direct solutions
-                - Higher thinking tokens (4096-8192) for complex reasoning
-                - Temperature 0.6 balances creativity and accuracy
-                **Real-time Features:**
-                - 🔄 Live token-by-token streaming
-                - 🧠 Intelligent section parsing (thinking/explanation/code)
-                - ⚠️ Automatic loop detection (stops if repetitive patterns detected)
-                - ⚡ Blinking cursors on actively streaming sections
-                - 📊 Live statistics (time, speed, tokens)
-                **Loop Detection:**
-                - Monitors for 3-9 word phrases repeated 5+ times
-                - Automatically stops generation to save tokens
-                - Truncates at loop start position
-                """)
-            generate_btn = gr.Button("🚀 Generate Solution", variant="primary", size="lg")
-            clear_btn = gr.Button("🗑️ Clear", size="sm")
         with gr.Column(scale=2):
-            output_html = gr.HTML(label="Solution")
-    generate_btn.click(
-        fn=generate_solution_streaming,
-        inputs=[prompt_input, temperature_slider, max_tokens_slider, max_thinking_slider],
-        outputs=output_html
-    )
-    clear_btn.click(
-        fn=lambda: ("", ""),
-        outputs=[prompt_input, output_html]
-    )
     gr.Examples(
         examples=[
-            ["Write a Python function to find the maximum sum of a contiguous subarray (Kadane's Algorithm). Include edge cases and test with array [-2,1,-3,4,-1,2,1,-5,4]"],
-            ["Implement a function to detect if a linked list has a cycle. Explain your approach and provide the solution."],
-            ["Given an array of integers and a target sum, find two numbers that add up to the target. Optimize for time complexity."],
-            ["Create a single page HTML application that lets the user choose a color and generates a matching color palette."],
         ],
-        inputs=prompt_input
     )
 if __name__ == "__main__":

 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+from threading import Thread, Event
 import re
 import time
+import html
+# --- Configuration ---
+MODEL_ID = "WeiboAI/VibeThinker-1.5B"
 class VibeThinkerModel:
     def __init__(self):
         self.model = None
         self.tokenizer = None
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.stop_signal = Event()
     def load_model(self):
+        if self.model is not None: return
+        print(f"🔄 Loading {MODEL_ID}...")
         try:
+            self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
             self.model = AutoModelForCausalLM.from_pretrained(
+                MODEL_ID,
+                torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
                 device_map="auto",
+                trust_remote_code=True,
+                low_cpu_mem_usage=True
             )
+            print("✅ Model loaded.")
+        except Exception as e:
+            raise e
+    def stop_generation(self):
+        self.stop_signal.set()
+    def _detect_tail_loop(self, text, min_phrase_len=3, max_phrase_len=10, threshold=20):
+        """
+        Detects if the generator has gotten stuck in a loop at the END of the text.
+        Criteria: A phrase of 3-10 words repeated at least 20 times consecutively.
+        """
+        words = text.split()
+        total_words = len(words)
+        # We need at least (min_phrase * threshold) words to even check
+        if total_words < min_phrase_len * threshold:
+            return False
+        # Only check the end of the string (optimization)
+        # We look at the last (max_phrase * threshold) words
+        check_window = max_phrase_len * threshold
+        recent_words = words[-check_window:] if total_words > check_window else words
+        for phrase_len in range(min_phrase_len, max_phrase_len + 1):
+            # The candidate phrase is the very last 'phrase_len' words
+            candidate_phrase = recent_words[-phrase_len:]
+            # Construct what the tail SHOULD look like if it's looping
+            # e.g. if phrase is "and then", we expect "and then and then..."
+            # We check if the tail of the text matches (phrase * threshold)
+            required_len = phrase_len * threshold
+            if len(recent_words) < required_len:
+                continue
+            segment_to_check = recent_words[-required_len:]
+            # Efficient check: does the segment consist ONLY of the candidate phrase?
+            # We compare the segment against the candidate phrase repeated
+            expected_segment = candidate_phrase * threshold
+            if segment_to_check == expected_segment:
+                return True
+        return False
+    def generate_response_streaming(self, prompt, temperature=0.6, max_new_tokens=32000):
+        if not self.model: self.load_model()
+        self.stop_signal.clear()
         try:
             start_time = time.time()
+            # Optimized Prompt for VibeThinker
+            messages = [
+                {"role": "system", "content": "You are an expert algorithm engineer. Analyze the problem deeply, then provide a clean Python solution."},
+                {"role": "user", "content": prompt}
+            ]
+            text_input = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+            inputs = self.tokenizer(text_input, return_tensors="pt").to(self.device)
+            streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
             generation_kwargs = dict(
                 **inputs,
                 max_new_tokens=max_new_tokens,
                 top_p=0.95,
                 top_k=50,
                 do_sample=True,
                 pad_token_id=self.tokenizer.eos_token_id,
                 streamer=streamer,
             )
             thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
             thread.start()
             generated_text = ""
             loop_detected = False
+            # Token counter for loop check frequency
+            token_count = 0
             for new_text in streamer:
+                if self.stop_signal.is_set(): break
                 generated_text += new_text
+                token_count += 1
+                # Check for loops every 10 tokens to save CPU
+                if token_count % 10 == 0:
+                    if self._detect_tail_loop(generated_text):
                         loop_detected = True
+                        self.stop_signal.set() # Stop the model
+                        # Optional: Truncate the repetitive garbage
+                        # (Simple truncation for UI cleanliness)
+                        generated_text = generated_text + "\n\n[⚠️ Generation stopped: Infinite loop detected]"
+                        break
                 yield generated_text, {
+                    "time": time.time() - start_time,
+                    "tokens": len(self.tokenizer.encode(generated_text)),
+                    "generating": True
+                }
+            if not self.stop_signal.is_set():
+                thread.join()
             yield generated_text, {
+                "time": time.time() - start_time,
+                "tokens": len(self.tokenizer.encode(generated_text)),
+                "generating": False
+            }
         except Exception as e:
+            yield f"Error: {str(e)}", None
 vibe_model = VibeThinkerModel()
+class ModernUIParser:
+    """Parses text into a structured, modern UI"""
+    def format_code(self, code, lang="python"):
+        """Applies basic HTML syntax highlighting regex"""
+        code = html.escape(code)
+        # Comments
+        code = re.sub(r'(#.*?)(?=\n|$)', r'<span class="c">\1</span>', code)
+        # Keywords
+        keywords = r'\b(def|class|return|if|else|elif|for|while|import|from|try|except|with|as|pass|None|True|False)\b'
+        code = re.sub(keywords, r'<span class="k">\1</span>', code)
+        # Builtins/Calls
+        code = re.sub(r'\b(print|len|range|enumerate|zip|super|__init__)\b', r'<span class="nf">\1</span>', code)
+        # Strings
+        code = re.sub(r'(&quot;.*?&quot;)', r'<span class="s">\1</span>', code)
+        code = re.sub(r"('.*?')", r'<span class="s">\1</span>', code)
+        return code
+    def parse_and_render(self, text, stats):
+        # 1. Separate Thinking from Content
+        # Heuristic: Content before the first code block or explicit "Solution" header is usually thinking
+        thinking = ""
+        solution = text
+        # Find split point
+        markers = ["```", "Here is the solution", "### Solution", "Implementation:"]
+        first_marker_idx = len(text)
+        for m in markers:
+            idx = text.find(m)
+            if idx != -1 and idx < first_marker_idx:
+                first_marker_idx = idx
+        if first_marker_idx < len(text) and first_marker_idx > 50:
+            thinking = text[:first_marker_idx].strip()
+            solution = text[first_marker_idx:].strip()
+        # 2. Process Solution Text (Markdown-ish to HTML)
+        # Handle Code Blocks
+        parts = re.split(r'(```\w*\n.*?```)', solution, flags=re.DOTALL)
+        solution_html = ""
+        for part in parts:
+            if part.startswith('```'):
+                # Extract lang and code
+                match = re.match(r'```(\w*)\n(.*?)```', part, re.DOTALL)
+                if match:
+                    lang = match.group(1) or "text"
+                    code_content = match.group(2)
+                    highlighted = self.format_code(code_content, lang)
+                    solution_html += f"""
+                    <div class="code-block">
+                        <div class="code-header">
+                            <span class="lang-tag">{lang}</span>
+                            <span class="copy-btn" onclick="navigator.clipboard.writeText(this.parentElement.nextElementSibling.innerText)">Copy</span>
+                        </div>
+                        <pre>{highlighted}</pre>
+                    </div>"""
+                else:
+                    solution_html += f"<pre>{html.escape(part)}</pre>"
+            else:
+                # Normal text processing
+                clean_text = html.escape(part)
+                # Headers
+                clean_text = re.sub(r'^### (.*?)$', r'<h3>\1</h3>', clean_text, flags=re.M)
+                clean_text = re.sub(r'^## (.*?)$', r'<h2>\1</h2>', clean_text, flags=re.M)
+                clean_text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', clean_text)
+                # Line breaks
+                clean_text = clean_text.replace('\n', '<br>')
+                solution_html += f"<div class='text-content'>{clean_text}</div>"
+        # 3. Process Thinking
+        thinking_html = html.escape(thinking).replace('\n', '<br>')
+        # 4. Stats & Cursor
+        is_gen = stats['generating'] if stats else False
+        t_sec = stats['tokens'] / stats['time'] if stats and stats['time'] > 0 else 0
+        cursor = '<span class="cursor"></span>' if is_gen else ''
+        # CSS Styles (Modern Dark Theme)
+        css = """
+        <style>
+            :root { --bg: #0f1117; --card: #1e293b; --accent: #6366f1; --text: #e2e8f0; --dim: #94a3b8; }
+            .ui-container { font-family: 'Inter', system-ui, sans-serif; color: var(--text); line-height: 1.6; }
+            /* Stats Bar */
+            .stats-bar { display: flex; gap: 15px; margin-bottom: 20px; font-size: 12px; text-transform: uppercase; letter-spacing: 1px; }
+            .stat-pill { background: #334155; padding: 4px 10px; border-radius: 20px; color: #cbd5e1; display: flex; align-items: center; gap: 6px; }
+            .stat-active { border: 1px solid var(--accent); color: var(--accent); background: rgba(99, 102, 241, 0.1); }
+            /* Thinking Section */
+            details.thinking-box { margin-bottom: 20px; border: 1px solid #312e81; border-radius: 8px; background: rgba(49, 46, 129, 0.1); overflow: hidden; }
+            details.thinking-box summary { padding: 12px 16px; cursor: pointer; font-weight: 600; color: #818cf8; list-style: none; outline: none; user-select: none; }
+            details.thinking-box summary::marker { display: none; }
+            details.thinking-box summary:hover { background: rgba(49, 46, 129, 0.2); }
+            .thought-content { padding: 16px; font-family: 'JetBrains Mono', monospace; font-size: 13px; color: #a5b4fc; border-top: 1px solid #312e81; }
+            /* Solution Section */
+            .solution-box { background: var(--bg); padding: 10px 0; }
+            .text-content { margin-bottom: 10px; }
+            h2, h3 { color: white; margin-top: 20px; margin-bottom: 10px; font-weight: 600; }
+            strong { color: #fff; font-weight: 700; }
+            /* Code Blocks */
+            .code-block { background: #0d1117; border: 1px solid #30363d; border-radius: 8px; margin: 15px 0; overflow: hidden; }
+            .code-header { background: #161b22; padding: 6px 12px; display: flex; justify-content: space-between; align-items: center; border-bottom: 1px solid #30363d; }
+            .lang-tag { font-size: 11px; color: #8b949e; text-transform: uppercase; font-weight: bold; }
+            .copy-btn { font-size: 11px; cursor: pointer; color: #58a6ff; }
+            .copy-btn:hover { text-decoration: underline; }
+            pre { margin: 0; padding: 16px; overflow-x: auto; font-family: 'Fira Code', 'Consolas', monospace; font-size: 14px; color: #c9d1d9; }
+            /* Syntax Highlighting Colors */
+            .k { color: #ff7b72; } /* Keyword */
+            .s { color: #a5d6ff; } /* String */
+            .c { color: #8b949e; font-style: italic; } /* Comment */
+            .nf { color: #d2a8ff; } /* Function */
+            /* Cursor Animation */
+            .cursor { display: inline-block; width: 8px; height: 18px; background: var(--accent); vertical-align: text-bottom; animation: blink 1s step-end infinite; margin-left: 2px; }
+            @keyframes blink { 0%, 100% { opacity: 1; } 50% { opacity: 0; } }
+        </style>
+        """
+        html_out = f"""{css}
+        <div class="ui-container">
+            <div class="stats-bar">
+                <div class="stat-pill {'stat-active' if is_gen else ''}">
+                    { '🟢 GENERATING' if is_gen else '⚪ COMPLETE' }
                 </div>
+                <div class="stat-pill">⏱️ {stats['time']:.1f}s</div>
+                <div class="stat-pill">⚡ {t_sec:.1f} T/s</div>
+                <div class="stat-pill">📝 {stats['tokens']} Tok</div>
             </div>
         """
+        if thinking:
+            # Open by default if generating, closed if done
+            is_open = "open" if is_gen else ""
+            html_out += f"""
+            <details class="thinking-box" {is_open}>
+                <summary>🧠 Chain of Thought (Process)</summary>
+                <div class="thought-content">
+                    {thinking_html} {cursor if not solution else ''}
                 </div>
+            </details>
             """
+        html_out += f"""
+            <div class="solution-box">
+                {solution_html} {cursor if solution or not thinking else ''}
+            </div>
+        </div>
         """
+        return html_out
+parser = ModernUIParser()
+def run_gen(prompt, temp, max_tokens):
+    if not prompt: return "Please enter a prompt."
+    gen = vibe_model.generate_response_streaming(prompt, temp, max_tokens)
+    for text, stats in gen:
         if stats:
+            yield parser.parse_and_render(text, stats)
         else:
+            yield f"<div style='color:red'>Error: {text}</div>"
+def stop_action():
+    vibe_model.stop_generation()
+# --- GRADIO INTERFACE ---
 with gr.Blocks(
+    title="VibeThinker IDE",
+    theme=gr.themes.Base(
+        primary_hue="indigo",
+        neutral_hue="slate",
+        font=("Inter", "sans-serif")
+    ),
+    css=".gradio-container { background-color: #0f1117 !important; border: none; }"
 ) as demo:
+    gr.Markdown("""
+    <div style="text-align: center; margin-bottom: 20px;">
+        <h1 style="color: white; font-size: 2rem;">⚡ VibeThinker IDE</h1>
+        <p style="color: #94a3b8;">Specialized 1.5B Model for Algorithms & Competitive Coding</p>
+    </div>
     """)
     with gr.Row():
+        # Left Column: Inputs
+        with gr.Column(scale=1, variant="panel"):
+            input_text = gr.Textbox(
+                label="Problem Statement",
+                lines=8,
+                placeholder="Paste a LeetCode problem or ask for a specific algorithm...",
+                elem_id="input-box"
             )
+            with gr.Accordion("Settings", open=False):
+                temp = gr.Slider(0.1, 1.0, value=0.6, label="Temperature")
+                tokens = gr.Slider(1024, 32000, value=8192, label="Max Tokens")
+            with gr.Row():
+                btn_run = gr.Button("▶ Run", variant="primary", scale=2)
+                btn_stop = gr.Button("⏹ Stop", variant="stop", scale=1)
+        # Right Column: Output
         with gr.Column(scale=2):
+            out_html = gr.HTML(label="Result Console")
+    btn_run.click(run_gen, inputs=[input_text, temp, tokens], outputs=out_html)
+    btn_stop.click(stop_action, None, None)
     gr.Examples(
         examples=[
+            ["Determine if a Sudoku board is valid. Provide a Python solution with O(1) space complexity if possible."],
+            ["Explain the Knuth-Morris-Pratt (KMP) algorithm and implement it in Python."],
+            ["Solve the 'Trapping Rain Water' problem using the two-pointer approach."],
         ],
+        inputs=input_text
     )
 if __name__ == "__main__":