Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,19 +20,19 @@ class VibeThinkerVLLM:
|
|
| 20 |
model="WeiboAI/VibeThinker-1.5B",
|
| 21 |
dtype="float16", # Use float16 instead of bfloat16 for T4
|
| 22 |
gpu_memory_utilization=0.85,
|
| 23 |
-
max_model_len=
|
| 24 |
enforce_eager=True, # Disable CUDA graphs for T4
|
| 25 |
disable_custom_all_reduce=True, # Avoid custom kernels
|
| 26 |
enable_prefix_caching=False, # Disable for stability
|
| 27 |
max_num_seqs=1, # Process one sequence at a time
|
| 28 |
trust_remote_code=True
|
| 29 |
)
|
| 30 |
-
print("β
vLLM model loaded successfully with T4-compatible settings")
|
| 31 |
except Exception as e:
|
| 32 |
print(f"β Error loading model: {e}")
|
| 33 |
raise
|
| 34 |
|
| 35 |
-
def generate_response(self, prompt, temperature=0.6, max_tokens=
|
| 36 |
"""
|
| 37 |
Generate response with thinking length control and loop detection
|
| 38 |
|
|
@@ -106,7 +106,6 @@ Keep reasoning under {max_thinking_tokens} tokens. DO NOT repeat yourself.<|im_e
|
|
| 106 |
|
| 107 |
def _detect_loop(self, text):
|
| 108 |
"""Detect if text contains repetitive loops"""
|
| 109 |
-
# Check for repeated phrases
|
| 110 |
words = text.split()
|
| 111 |
if len(words) < 20:
|
| 112 |
return False
|
|
@@ -129,71 +128,84 @@ Keep reasoning under {max_thinking_tokens} tokens. DO NOT repeat yourself.<|im_e
|
|
| 129 |
rest_start = i + length
|
| 130 |
rest = ' '.join(words[rest_start:])
|
| 131 |
if phrase in rest:
|
| 132 |
-
# Truncate at first repetition
|
| 133 |
return ' '.join(words[:rest_start])
|
| 134 |
return text
|
| 135 |
|
| 136 |
def parse_model_output(text):
|
| 137 |
"""
|
| 138 |
Parse model output to separate thinking and final answer
|
|
|
|
| 139 |
Returns: (thinking_content, answer_content, code_blocks)
|
| 140 |
"""
|
| 141 |
-
# Remove common loop warning
|
| 142 |
loop_warning = ""
|
| 143 |
if "[Loop detected and truncated]" in text:
|
| 144 |
loop_warning = "\n\nβ οΈ **Note**: Repetitive content was detected and removed"
|
| 145 |
text = text.replace("β οΈ *[Loop detected and truncated]*", "")
|
| 146 |
|
| 147 |
-
# Try to find thinking
|
| 148 |
thinking_patterns = [
|
| 149 |
r'<think>(.*?)</think>',
|
| 150 |
r'<thinking>(.*?)</thinking>',
|
| 151 |
-
r'(?:Let me think|Let\'s think|Analysis):(.*?)(?=\n\n[SA-Z]|Solution:|Code:|```|\Z)',
|
| 152 |
]
|
| 153 |
|
| 154 |
thinking_content = ""
|
|
|
|
|
|
|
| 155 |
for pattern in thinking_patterns:
|
| 156 |
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
|
| 157 |
if match:
|
| 158 |
thinking_content = match.group(1).strip()
|
| 159 |
-
|
| 160 |
break
|
| 161 |
|
| 162 |
-
# If no explicit thinking,
|
|
|
|
| 163 |
if not thinking_content:
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
-
#
|
| 172 |
code_pattern = r'```(\w+)?\n(.*?)```'
|
| 173 |
-
code_blocks = re.findall(code_pattern,
|
| 174 |
|
| 175 |
-
# Extract final answer (boxed or
|
| 176 |
-
answer_match = re.search(r'\\boxed\{([^}]+)\}',
|
| 177 |
if answer_match:
|
| 178 |
-
|
| 179 |
else:
|
| 180 |
-
|
| 181 |
|
| 182 |
-
|
| 183 |
|
| 184 |
-
return thinking_content,
|
| 185 |
|
| 186 |
def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_tokens, generation_time):
|
| 187 |
"""
|
| 188 |
-
Format output as styled HTML
|
| 189 |
"""
|
| 190 |
total_tokens = prompt_tokens + completion_tokens
|
| 191 |
thinking_tokens_est = len(thinking.split()) * 1.3 if thinking else 0
|
| 192 |
tokens_per_sec = completion_tokens / generation_time if generation_time > 0 else 0
|
| 193 |
|
| 194 |
-
# Build thinking section HTML
|
| 195 |
thinking_html = ""
|
| 196 |
if thinking:
|
|
|
|
|
|
|
| 197 |
thinking_html = f"""
|
| 198 |
<details style="background: #f8f9fa; border: 2px solid #e9ecef; border-radius: 12px; padding: 20px; margin-bottom: 24px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
|
| 199 |
<summary style="cursor: pointer; font-weight: 600; font-size: 16px; color: #495057; user-select: none; display: flex; align-items: center; gap: 8px;">
|
|
@@ -202,29 +214,42 @@ def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_
|
|
| 202 |
<span style="margin-left: auto; font-size: 12px; color: #6c757d;">Click to expand/collapse</span>
|
| 203 |
</summary>
|
| 204 |
<div style="margin-top: 16px; padding-top: 16px; border-top: 1px solid #dee2e6; color: #212529; line-height: 1.7; white-space: pre-wrap; font-size: 14px; font-family: 'SF Mono', Monaco, Consolas, monospace; background: #ffffff; padding: 16px; border-radius: 8px;">
|
| 205 |
-
{
|
| 206 |
</div>
|
| 207 |
</details>
|
| 208 |
"""
|
| 209 |
|
| 210 |
-
# Build code blocks HTML
|
| 211 |
code_html = ""
|
| 212 |
if code_blocks:
|
| 213 |
code_blocks_html = ""
|
| 214 |
-
for lang, code in code_blocks:
|
| 215 |
lang_display = lang if lang else "code"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
code_blocks_html += f"""
|
| 217 |
<div style="margin-bottom: 16px; background: #1e1e1e; border-radius: 12px; overflow: hidden; box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
|
| 218 |
<div style="background: #2d2d2d; padding: 12px 20px; color: #ffffff; font-weight: 600; font-size: 13px; display: flex; justify-content: space-between; align-items: center; border-bottom: 1px solid #3d3d3d;">
|
| 219 |
<span>{lang_display}</span>
|
| 220 |
-
<
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
</div>
|
| 227 |
-
<pre style="margin: 0; padding: 20px; color: #d4d4d4; overflow-x: auto; font-family: 'SF Mono', Monaco, Consolas, monospace; font-size: 14px; line-height: 1.6;"><code>{
|
| 228 |
</div>
|
| 229 |
"""
|
| 230 |
|
|
@@ -235,6 +260,35 @@ def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_
|
|
| 235 |
</h3>
|
| 236 |
{code_blocks_html}
|
| 237 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
"""
|
| 239 |
|
| 240 |
html = f"""
|
|
@@ -271,7 +325,7 @@ def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_
|
|
| 271 |
</div>
|
| 272 |
</div>
|
| 273 |
|
| 274 |
-
<!-- Thinking Section -->
|
| 275 |
{thinking_html}
|
| 276 |
|
| 277 |
<!-- Answer Section -->
|
|
@@ -284,7 +338,7 @@ def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_
|
|
| 284 |
</div>
|
| 285 |
</div>
|
| 286 |
|
| 287 |
-
<!-- Code Blocks -->
|
| 288 |
{code_html}
|
| 289 |
|
| 290 |
</div>
|
|
@@ -292,18 +346,16 @@ def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_
|
|
| 292 |
return html
|
| 293 |
|
| 294 |
# Initialize model
|
| 295 |
-
print("π Initializing VibeThinker with vLLM (T4-optimized)...")
|
| 296 |
vibe_model = VibeThinkerVLLM()
|
| 297 |
|
| 298 |
-
def generate_solution(prompt, temperature=0.6, max_tokens=
|
| 299 |
"""Generate and format solution with progress tracking"""
|
| 300 |
if not prompt.strip():
|
| 301 |
return "<p style='color: #dc3545; font-size: 16px; padding: 20px;'>β οΈ Please enter a problem to solve.</p>"
|
| 302 |
|
| 303 |
-
# Show progress
|
| 304 |
progress(0, desc="π Initializing generation...")
|
| 305 |
|
| 306 |
-
# Generate response
|
| 307 |
progress(0.3, desc="π§ Model is thinking...")
|
| 308 |
response, prompt_tokens, completion_tokens, gen_time = vibe_model.generate_response(
|
| 309 |
prompt,
|
|
@@ -314,7 +366,7 @@ def generate_solution(prompt, temperature=0.6, max_tokens=8192, max_thinking_tok
|
|
| 314 |
|
| 315 |
progress(0.8, desc="π Formatting output...")
|
| 316 |
|
| 317 |
-
# Parse output
|
| 318 |
thinking, answer, code_blocks = parse_model_output(response)
|
| 319 |
|
| 320 |
# Format as HTML
|
|
@@ -341,7 +393,7 @@ with gr.Blocks(
|
|
| 341 |
|
| 342 |
**Optimized for**: Competitive programming (LeetCode, Codeforces, AtCoder) and algorithm challenges
|
| 343 |
|
| 344 |
-
β‘ **Powered by vLLM** | π― **Best for**: Python algorithmic problems with clear input/output specs
|
| 345 |
|
| 346 |
β οΈ **Note**: This model is specialized for competitive programming, not general software development
|
| 347 |
""")
|
|
@@ -364,10 +416,10 @@ with gr.Blocks(
|
|
| 364 |
)
|
| 365 |
max_tokens_slider = gr.Slider(
|
| 366 |
minimum=1024,
|
| 367 |
-
maximum=
|
| 368 |
-
value=
|
| 369 |
step=1024,
|
| 370 |
-
label="π Max Total Tokens"
|
| 371 |
)
|
| 372 |
max_thinking_slider = gr.Slider(
|
| 373 |
minimum=512,
|
|
@@ -383,6 +435,7 @@ with gr.Blocks(
|
|
| 383 |
- Higher thinking tokens (4096-8192) for complex problems requiring detailed reasoning
|
| 384 |
- Temperature 0.6 balances creativity and accuracy
|
| 385 |
- Loop detection is automatic - repetitive output will be truncated
|
|
|
|
| 386 |
""")
|
| 387 |
|
| 388 |
generate_btn = gr.Button("π Generate Solution", variant="primary", size="lg")
|
|
|
|
| 20 |
model="WeiboAI/VibeThinker-1.5B",
|
| 21 |
dtype="float16", # Use float16 instead of bfloat16 for T4
|
| 22 |
gpu_memory_utilization=0.85,
|
| 23 |
+
max_model_len=40960, # Full 40K context as per docs
|
| 24 |
enforce_eager=True, # Disable CUDA graphs for T4
|
| 25 |
disable_custom_all_reduce=True, # Avoid custom kernels
|
| 26 |
enable_prefix_caching=False, # Disable for stability
|
| 27 |
max_num_seqs=1, # Process one sequence at a time
|
| 28 |
trust_remote_code=True
|
| 29 |
)
|
| 30 |
+
print("β
vLLM model loaded successfully with T4-compatible settings (40K context)")
|
| 31 |
except Exception as e:
|
| 32 |
print(f"β Error loading model: {e}")
|
| 33 |
raise
|
| 34 |
|
| 35 |
+
def generate_response(self, prompt, temperature=0.6, max_tokens=16384, max_thinking_tokens=4096):
|
| 36 |
"""
|
| 37 |
Generate response with thinking length control and loop detection
|
| 38 |
|
|
|
|
| 106 |
|
| 107 |
def _detect_loop(self, text):
|
| 108 |
"""Detect if text contains repetitive loops"""
|
|
|
|
| 109 |
words = text.split()
|
| 110 |
if len(words) < 20:
|
| 111 |
return False
|
|
|
|
| 128 |
rest_start = i + length
|
| 129 |
rest = ' '.join(words[rest_start:])
|
| 130 |
if phrase in rest:
|
|
|
|
| 131 |
return ' '.join(words[:rest_start])
|
| 132 |
return text
|
| 133 |
|
| 134 |
def parse_model_output(text):
|
| 135 |
"""
|
| 136 |
Parse model output to separate thinking and final answer
|
| 137 |
+
ONLY extract code from the final answer section, not from thinking
|
| 138 |
Returns: (thinking_content, answer_content, code_blocks)
|
| 139 |
"""
|
|
|
|
| 140 |
loop_warning = ""
|
| 141 |
if "[Loop detected and truncated]" in text:
|
| 142 |
loop_warning = "\n\nβ οΈ **Note**: Repetitive content was detected and removed"
|
| 143 |
text = text.replace("β οΈ *[Loop detected and truncated]*", "")
|
| 144 |
|
| 145 |
+
# Try to find explicit thinking delimiters
|
| 146 |
thinking_patterns = [
|
| 147 |
r'<think>(.*?)</think>',
|
| 148 |
r'<thinking>(.*?)</thinking>',
|
|
|
|
| 149 |
]
|
| 150 |
|
| 151 |
thinking_content = ""
|
| 152 |
+
answer_content = text
|
| 153 |
+
|
| 154 |
for pattern in thinking_patterns:
|
| 155 |
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
|
| 156 |
if match:
|
| 157 |
thinking_content = match.group(1).strip()
|
| 158 |
+
answer_content = re.sub(pattern, '', text, flags=re.DOTALL | re.IGNORECASE).strip()
|
| 159 |
break
|
| 160 |
|
| 161 |
+
# If no explicit thinking tags, try to detect reasoning section
|
| 162 |
+
# Look for a natural break like "Solution:" or "Here's the code:"
|
| 163 |
if not thinking_content:
|
| 164 |
+
split_markers = [
|
| 165 |
+
r'(.*?)(?=\n\n(?:Solution|Here\'s|Implementation|Code|Final).*?:)',
|
| 166 |
+
r'(.*?)(?=\n\n```)', # Before first code block
|
| 167 |
+
]
|
| 168 |
+
|
| 169 |
+
for pattern in split_markers:
|
| 170 |
+
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
|
| 171 |
+
if match:
|
| 172 |
+
potential_thinking = match.group(1).strip()
|
| 173 |
+
# Only treat as thinking if it's substantial (>100 chars) and contains reasoning keywords
|
| 174 |
+
if len(potential_thinking) > 100:
|
| 175 |
+
thinking_lower = potential_thinking.lower()
|
| 176 |
+
if any(word in thinking_lower for word in ['step', 'approach', 'idea', 'first', 'we can', 'let\'s']):
|
| 177 |
+
thinking_content = potential_thinking
|
| 178 |
+
answer_content = text[len(potential_thinking):].strip()
|
| 179 |
+
break
|
| 180 |
|
| 181 |
+
# NOW extract code blocks ONLY from answer_content (not from thinking)
|
| 182 |
code_pattern = r'```(\w+)?\n(.*?)```'
|
| 183 |
+
code_blocks = re.findall(code_pattern, answer_content, re.DOTALL)
|
| 184 |
|
| 185 |
+
# Extract final answer (boxed or use answer_content as-is)
|
| 186 |
+
answer_match = re.search(r'\\boxed\{([^}]+)\}', answer_content)
|
| 187 |
if answer_match:
|
| 188 |
+
final_answer = f"**Final Answer:** {answer_match.group(1)}"
|
| 189 |
else:
|
| 190 |
+
final_answer = answer_content
|
| 191 |
|
| 192 |
+
final_answer += loop_warning
|
| 193 |
|
| 194 |
+
return thinking_content, final_answer, code_blocks
|
| 195 |
|
| 196 |
def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_tokens, generation_time):
|
| 197 |
"""
|
| 198 |
+
Format output as styled HTML - thinking is plain text, code blocks are from final answer only
|
| 199 |
"""
|
| 200 |
total_tokens = prompt_tokens + completion_tokens
|
| 201 |
thinking_tokens_est = len(thinking.split()) * 1.3 if thinking else 0
|
| 202 |
tokens_per_sec = completion_tokens / generation_time if generation_time > 0 else 0
|
| 203 |
|
| 204 |
+
# Build thinking section HTML - PLAIN TEXT, NO CODE PARSING
|
| 205 |
thinking_html = ""
|
| 206 |
if thinking:
|
| 207 |
+
# Escape any HTML in thinking to prevent rendering
|
| 208 |
+
thinking_escaped = thinking.replace('<', '<').replace('>', '>')
|
| 209 |
thinking_html = f"""
|
| 210 |
<details style="background: #f8f9fa; border: 2px solid #e9ecef; border-radius: 12px; padding: 20px; margin-bottom: 24px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
|
| 211 |
<summary style="cursor: pointer; font-weight: 600; font-size: 16px; color: #495057; user-select: none; display: flex; align-items: center; gap: 8px;">
|
|
|
|
| 214 |
<span style="margin-left: auto; font-size: 12px; color: #6c757d;">Click to expand/collapse</span>
|
| 215 |
</summary>
|
| 216 |
<div style="margin-top: 16px; padding-top: 16px; border-top: 1px solid #dee2e6; color: #212529; line-height: 1.7; white-space: pre-wrap; font-size: 14px; font-family: 'SF Mono', Monaco, Consolas, monospace; background: #ffffff; padding: 16px; border-radius: 8px;">
|
| 217 |
+
{thinking_escaped}
|
| 218 |
</div>
|
| 219 |
</details>
|
| 220 |
"""
|
| 221 |
|
| 222 |
+
# Build code blocks HTML - ONLY from final answer
|
| 223 |
code_html = ""
|
| 224 |
if code_blocks:
|
| 225 |
code_blocks_html = ""
|
| 226 |
+
for idx, (lang, code) in enumerate(code_blocks):
|
| 227 |
lang_display = lang if lang else "code"
|
| 228 |
+
code_id = f"code_{idx}"
|
| 229 |
+
|
| 230 |
+
# Create downloadable version
|
| 231 |
+
code_clean = code.strip()
|
| 232 |
+
|
| 233 |
code_blocks_html += f"""
|
| 234 |
<div style="margin-bottom: 16px; background: #1e1e1e; border-radius: 12px; overflow: hidden; box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
|
| 235 |
<div style="background: #2d2d2d; padding: 12px 20px; color: #ffffff; font-weight: 600; font-size: 13px; display: flex; justify-content: space-between; align-items: center; border-bottom: 1px solid #3d3d3d;">
|
| 236 |
<span>{lang_display}</span>
|
| 237 |
+
<div style="display: flex; gap: 8px;">
|
| 238 |
+
<button onclick="navigator.clipboard.writeText(document.getElementById('{code_id}').textContent)"
|
| 239 |
+
style="background: #4CAF50; color: white; border: none; padding: 6px 14px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: background 0.2s;"
|
| 240 |
+
onmouseover="this.style.background='#45a049'"
|
| 241 |
+
onmouseout="this.style.background='#4CAF50'">
|
| 242 |
+
π Copy
|
| 243 |
+
</button>
|
| 244 |
+
<button onclick="downloadCode(document.getElementById('{code_id}').textContent, '{lang_display}')"
|
| 245 |
+
style="background: #2196F3; color: white; border: none; padding: 6px 14px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: background 0.2s;"
|
| 246 |
+
onmouseover="this.style.background='#0b7dda'"
|
| 247 |
+
onmouseout="this.style.background='#2196F3'">
|
| 248 |
+
πΎ Download
|
| 249 |
+
</button>
|
| 250 |
+
</div>
|
| 251 |
</div>
|
| 252 |
+
<pre style="margin: 0; padding: 20px; color: #d4d4d4; overflow-x: auto; font-family: 'SF Mono', Monaco, Consolas, monospace; font-size: 14px; line-height: 1.6;"><code id="{code_id}">{code_clean}</code></pre>
|
| 253 |
</div>
|
| 254 |
"""
|
| 255 |
|
|
|
|
| 260 |
</h3>
|
| 261 |
{code_blocks_html}
|
| 262 |
</div>
|
| 263 |
+
|
| 264 |
+
<script>
|
| 265 |
+
function downloadCode(code, lang) {{
|
| 266 |
+
const extensions = {{
|
| 267 |
+
'python': 'py',
|
| 268 |
+
'javascript': 'js',
|
| 269 |
+
'java': 'java',
|
| 270 |
+
'cpp': 'cpp',
|
| 271 |
+
'c': 'c',
|
| 272 |
+
'html': 'html',
|
| 273 |
+
'css': 'css',
|
| 274 |
+
'typescript': 'ts',
|
| 275 |
+
'rust': 'rs',
|
| 276 |
+
'go': 'go',
|
| 277 |
+
}};
|
| 278 |
+
const ext = extensions[lang.toLowerCase()] || 'txt';
|
| 279 |
+
const filename = `solution.${{ext}}`;
|
| 280 |
+
|
| 281 |
+
const blob = new Blob([code], {{ type: 'text/plain' }});
|
| 282 |
+
const url = window.URL.createObjectURL(blob);
|
| 283 |
+
const a = document.createElement('a');
|
| 284 |
+
a.href = url;
|
| 285 |
+
a.download = filename;
|
| 286 |
+
document.body.appendChild(a);
|
| 287 |
+
a.click();
|
| 288 |
+
document.body.removeChild(a);
|
| 289 |
+
window.URL.revokeObjectURL(url);
|
| 290 |
+
}}
|
| 291 |
+
</script>
|
| 292 |
"""
|
| 293 |
|
| 294 |
html = f"""
|
|
|
|
| 325 |
</div>
|
| 326 |
</div>
|
| 327 |
|
| 328 |
+
<!-- Thinking Section (Plain Text Only) -->
|
| 329 |
{thinking_html}
|
| 330 |
|
| 331 |
<!-- Answer Section -->
|
|
|
|
| 338 |
</div>
|
| 339 |
</div>
|
| 340 |
|
| 341 |
+
<!-- Code Blocks (From Final Answer Only) -->
|
| 342 |
{code_html}
|
| 343 |
|
| 344 |
</div>
|
|
|
|
| 346 |
return html
|
| 347 |
|
| 348 |
# Initialize model
|
| 349 |
+
print("π Initializing VibeThinker with vLLM (T4-optimized, 40K context)...")
|
| 350 |
vibe_model = VibeThinkerVLLM()
|
| 351 |
|
| 352 |
+
def generate_solution(prompt, temperature=0.6, max_tokens=16384, max_thinking_tokens=4096, progress=gr.Progress()):
|
| 353 |
"""Generate and format solution with progress tracking"""
|
| 354 |
if not prompt.strip():
|
| 355 |
return "<p style='color: #dc3545; font-size: 16px; padding: 20px;'>β οΈ Please enter a problem to solve.</p>"
|
| 356 |
|
|
|
|
| 357 |
progress(0, desc="π Initializing generation...")
|
| 358 |
|
|
|
|
| 359 |
progress(0.3, desc="π§ Model is thinking...")
|
| 360 |
response, prompt_tokens, completion_tokens, gen_time = vibe_model.generate_response(
|
| 361 |
prompt,
|
|
|
|
| 366 |
|
| 367 |
progress(0.8, desc="π Formatting output...")
|
| 368 |
|
| 369 |
+
# Parse output - thinking stays as plain text, code only from answer
|
| 370 |
thinking, answer, code_blocks = parse_model_output(response)
|
| 371 |
|
| 372 |
# Format as HTML
|
|
|
|
| 393 |
|
| 394 |
**Optimized for**: Competitive programming (LeetCode, Codeforces, AtCoder) and algorithm challenges
|
| 395 |
|
| 396 |
+
β‘ **Powered by vLLM** (40K context) | π― **Best for**: Python algorithmic problems with clear input/output specs
|
| 397 |
|
| 398 |
β οΈ **Note**: This model is specialized for competitive programming, not general software development
|
| 399 |
""")
|
|
|
|
| 416 |
)
|
| 417 |
max_tokens_slider = gr.Slider(
|
| 418 |
minimum=1024,
|
| 419 |
+
maximum=40960,
|
| 420 |
+
value=16384,
|
| 421 |
step=1024,
|
| 422 |
+
label="π Max Total Tokens (40K max)"
|
| 423 |
)
|
| 424 |
max_thinking_slider = gr.Slider(
|
| 425 |
minimum=512,
|
|
|
|
| 435 |
- Higher thinking tokens (4096-8192) for complex problems requiring detailed reasoning
|
| 436 |
- Temperature 0.6 balances creativity and accuracy
|
| 437 |
- Loop detection is automatic - repetitive output will be truncated
|
| 438 |
+
- Code blocks shown are from final solution only (not from reasoning process)
|
| 439 |
""")
|
| 440 |
|
| 441 |
generate_btn = gr.Button("π Generate Solution", variant="primary", size="lg")
|