import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer from threading import Thread, Event import re import time import html # --- Configuration --- MODEL_ID = "WeiboAI/VibeThinker-1.5B" class VibeThinkerModel: def __init__(self): self.model = None self.tokenizer = None self.device = "cuda" if torch.cuda.is_available() else "cpu" self.stop_signal = Event() def load_model(self): if self.model is not None: return print(f"🔄 Loading {MODEL_ID}...") try: self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) self.model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16, device_map="auto", trust_remote_code=True, low_cpu_mem_usage=True ) print("✅ Model loaded.") except Exception as e: raise e def stop_generation(self): self.stop_signal.set() def _detect_tail_loop(self, text, min_phrase_len=3, max_phrase_len=10, threshold=20): """ Detects if the generator has gotten stuck in a loop at the END of the text. Criteria: A phrase of 3-10 words repeated at least 20 times consecutively. """ words = text.split() total_words = len(words) # We need at least (min_phrase * threshold) words to even check if total_words < min_phrase_len * threshold: return False # Only check the end of the string (optimization) # We look at the last (max_phrase * threshold) words check_window = max_phrase_len * threshold recent_words = words[-check_window:] if total_words > check_window else words for phrase_len in range(min_phrase_len, max_phrase_len + 1): # The candidate phrase is the very last 'phrase_len' words candidate_phrase = recent_words[-phrase_len:] # Construct what the tail SHOULD look like if it's looping # e.g. if phrase is "and then", we expect "and then and then..." # We check if the tail of the text matches (phrase * threshold) required_len = phrase_len * threshold if len(recent_words) < required_len: continue segment_to_check = recent_words[-required_len:] # Efficient check: does the segment consist ONLY of the candidate phrase? # We compare the segment against the candidate phrase repeated expected_segment = candidate_phrase * threshold if segment_to_check == expected_segment: return True return False def generate_response_streaming(self, prompt, temperature=0.6, max_new_tokens=32000): if not self.model: self.load_model() self.stop_signal.clear() try: start_time = time.time() # Optimized Prompt for VibeThinker messages = [ {"role": "system", "content": "You are an expert algorithm engineer. Analyze the problem deeply, then provide a clean Python solution."}, {"role": "user", "content": prompt} ] text_input = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) inputs = self.tokenizer(text_input, return_tensors="pt").to(self.device) streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True) generation_kwargs = dict( **inputs, max_new_tokens=max_new_tokens, temperature=temperature, top_p=0.95, top_k=50, do_sample=True, pad_token_id=self.tokenizer.eos_token_id, streamer=streamer, ) thread = Thread(target=self.model.generate, kwargs=generation_kwargs) thread.start() generated_text = "" loop_detected = False # Token counter for loop check frequency token_count = 0 for new_text in streamer: if self.stop_signal.is_set(): break generated_text += new_text token_count += 1 # Check for loops every 10 tokens to save CPU if token_count % 10 == 0: if self._detect_tail_loop(generated_text): loop_detected = True self.stop_signal.set() # Stop the model # Optional: Truncate the repetitive garbage # (Simple truncation for UI cleanliness) generated_text = generated_text + "\n\n[⚠️ Generation stopped: Infinite loop detected]" break yield generated_text, { "time": time.time() - start_time, "tokens": len(self.tokenizer.encode(generated_text)), "generating": True } if not self.stop_signal.is_set(): thread.join() yield generated_text, { "time": time.time() - start_time, "tokens": len(self.tokenizer.encode(generated_text)), "generating": False } except Exception as e: yield f"Error: {str(e)}", None vibe_model = VibeThinkerModel() class ModernUIParser: """Parses text into a structured, modern UI""" def format_code(self, code, lang="python"): """Applies basic HTML syntax highlighting regex""" code = html.escape(code) # Comments code = re.sub(r'(#.*?)(?=\n|$)', r'\1', code) # Keywords keywords = r'\b(def|class|return|if|else|elif|for|while|import|from|try|except|with|as|pass|None|True|False)\b' code = re.sub(keywords, r'\1', code) # Builtins/Calls code = re.sub(r'\b(print|len|range|enumerate|zip|super|__init__)\b', r'\1', code) # Strings code = re.sub(r'(".*?")', r'\1', code) code = re.sub(r"('.*?')", r'\1', code) return code def parse_and_render(self, text, stats): # 1. Separate Thinking from Content # Heuristic: Content before the first code block or explicit "Solution" header is usually thinking thinking = "" solution = text # Find split point markers = ["```", "Here is the solution", "### Solution", "Implementation:"] first_marker_idx = len(text) for m in markers: idx = text.find(m) if idx != -1 and idx < first_marker_idx: first_marker_idx = idx if first_marker_idx < len(text) and first_marker_idx > 50: thinking = text[:first_marker_idx].strip() solution = text[first_marker_idx:].strip() # 2. Process Solution Text (Markdown-ish to HTML) # Handle Code Blocks parts = re.split(r'(```\w*\n.*?```)', solution, flags=re.DOTALL) solution_html = "" for part in parts: if part.startswith('```'): # Extract lang and code match = re.match(r'```(\w*)\n(.*?)```', part, re.DOTALL) if match: lang = match.group(1) or "text" code_content = match.group(2) highlighted = self.format_code(code_content, lang) solution_html += f"""
{lang} Copy
{highlighted}
""" else: solution_html += f"
{html.escape(part)}
" else: # Normal text processing clean_text = html.escape(part) # Headers clean_text = re.sub(r'^### (.*?)$', r'

\1

', clean_text, flags=re.M) clean_text = re.sub(r'^## (.*?)$', r'

\1

', clean_text, flags=re.M) clean_text = re.sub(r'\*\*(.*?)\*\*', r'\1', clean_text) # Line breaks clean_text = clean_text.replace('\n', '
') solution_html += f"
{clean_text}
" # 3. Process Thinking thinking_html = html.escape(thinking).replace('\n', '
') # 4. Stats & Cursor is_gen = stats['generating'] if stats else False t_sec = stats['tokens'] / stats['time'] if stats and stats['time'] > 0 else 0 cursor = '' if is_gen else '' # CSS Styles (Modern Dark Theme) css = """ """ html_out = f"""{css}
{ '🟢 GENERATING' if is_gen else '⚪ COMPLETE' }
⏱️ {stats['time']:.1f}s
⚡ {t_sec:.1f} T/s
📝 {stats['tokens']} Tok
""" if thinking: # Open by default if generating, closed if done is_open = "open" if is_gen else "" html_out += f"""
🧠 Chain of Thought (Process)
{thinking_html} {cursor if not solution else ''}
""" html_out += f"""
{solution_html} {cursor if solution or not thinking else ''}
""" return html_out parser = ModernUIParser() def run_gen(prompt, temp, max_tokens): if not prompt: return "Please enter a prompt." gen = vibe_model.generate_response_streaming(prompt, temp, max_tokens) for text, stats in gen: if stats: yield parser.parse_and_render(text, stats) else: yield f"
Error: {text}
" def stop_action(): vibe_model.stop_generation() # --- GRADIO INTERFACE --- with gr.Blocks( title="VibeThinker IDE", theme=gr.themes.Base( primary_hue="indigo", neutral_hue="slate", font=("Inter", "sans-serif") ), css=".gradio-container { background-color: #0f1117 !important; border: none; }" ) as demo: gr.Markdown("""

⚡ VibeThinker IDE

Specialized 1.5B Model for Algorithms & Competitive Coding

""") with gr.Row(): # Left Column: Inputs with gr.Column(scale=1, variant="panel"): input_text = gr.Textbox( label="Problem Statement", lines=8, placeholder="Paste a LeetCode problem or ask for a specific algorithm...", elem_id="input-box" ) with gr.Accordion("Settings", open=False): temp = gr.Slider(0.1, 1.0, value=0.6, label="Temperature") tokens = gr.Slider(1024, 32000, value=8192, label="Max Tokens") with gr.Row(): btn_run = gr.Button("▶ Run", variant="primary", scale=2) btn_stop = gr.Button("⏹ Stop", variant="stop", scale=1) # Right Column: Output with gr.Column(scale=2): out_html = gr.HTML(label="Result Console") btn_run.click(run_gen, inputs=[input_text, temp, tokens], outputs=out_html) btn_stop.click(stop_action, None, None) gr.Examples( examples=[ ["Determine if a Sudoku board is valid. Provide a Python solution with O(1) space complexity if possible."], ["Explain the Knuth-Morris-Pratt (KMP) algorithm and implement it in Python."], ["Solve the 'Trapping Rain Water' problem using the two-pointer approach."], ], inputs=input_text ) if __name__ == "__main__": demo.launch()