akhaliq HF Staff commited on
Commit
0498411
·
1 Parent(s): 2948440

update gemini

Browse files
Files changed (4) hide show
  1. anycoder_app/deploy.py +145 -64
  2. anycoder_app/models.py +2 -1
  3. backend_api.py +76 -71
  4. backend_models.py +337 -0
anycoder_app/deploy.py CHANGED
@@ -97,6 +97,44 @@ def generation_code(query: Optional[str], _setting: Dict[str, str], _history: Op
97
  yield (error_message, _history or [], history_to_chatbot_messages(_history or []))
98
  return
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  if query is None:
101
  query = ''
102
  if _history is None:
@@ -138,11 +176,16 @@ def generation_code(query: Optional[str], _setting: Dict[str, str], _history: Op
138
 
139
  # If this is a modification request, try to apply search/replace first
140
  if has_existing_content and query.strip():
141
- try:
142
- # Use the current model to generate search/replace instructions
143
- client = get_inference_client(_current_model['id'], provider)
144
-
145
- system_prompt = """You are a code editor assistant. Given existing code and modification instructions, generate EXACT search/replace blocks.
 
 
 
 
 
146
 
147
  CRITICAL REQUIREMENTS:
148
  1. Use EXACTLY these markers: <<<<<<< SEARCH, =======, >>>>>>> REPLACE
@@ -163,73 +206,73 @@ Example format:
163
  }
164
  >>>>>>> REPLACE"""
165
 
166
- user_prompt = f"""Existing code:
167
  {last_assistant_msg}
168
  Modification instructions:
169
  {query}
170
 
171
  Generate the exact search/replace blocks needed to make these changes."""
172
 
173
- messages = [
174
- {"role": "system", "content": system_prompt},
175
- {"role": "user", "content": user_prompt}
176
- ]
177
-
178
- # Generate search/replace instructions
179
- if _current_model.get('type') == 'openai':
180
- response = client.chat.completions.create(
181
- model=get_real_model_id(_current_model['id']),
182
- messages=messages,
183
- max_tokens=4000,
184
- temperature=0.1
185
- )
186
- changes_text = response.choices[0].message.content
187
- elif _current_model.get('type') == 'mistral':
188
- response = client.chat.complete(
189
- model=get_real_model_id(_current_model['id']),
190
- messages=messages,
191
- max_tokens=4000,
192
- temperature=0.1
193
- )
194
- changes_text = response.choices[0].message.content
195
- else: # Hugging Face or other
196
- completion = client.chat.completions.create(
197
- model=get_real_model_id(_current_model['id']),
198
- messages=messages,
199
- max_tokens=4000,
200
- temperature=0.1
201
- )
202
- changes_text = completion.choices[0].message.content
203
-
204
- # Apply the search/replace changes
205
- if language == "transformers.js" and ('=== index.html ===' in last_assistant_msg):
206
- modified_content = apply_transformers_js_search_replace_changes(last_assistant_msg, changes_text)
207
- else:
208
- modified_content = apply_search_replace_changes(last_assistant_msg, changes_text)
209
-
210
- # If changes were successfully applied, return the modified content
211
- if modified_content != last_assistant_msg:
212
- _history.append([query, modified_content])
213
 
214
- # Generate deployment message instead of preview
215
- deploy_message = f"""
216
- <div style='padding: 1.5em; text-align: center; background: #f0f9ff; border: 2px solid #0ea5e9; border-radius: 10px; color: #0c4a6e;'>
217
- <h3 style='margin-top: 0; color: #0ea5e9;'>✅ Code Updated Successfully!</h3>
218
- <p style='margin: 0.5em 0; font-size: 1.1em;'>Your {language.upper()} code has been modified and is ready for deployment.</p>
219
- <p style='margin: 0.5em 0; font-weight: bold;'>👉 Use the Deploy button in the sidebar to publish your app!</p>
220
- </div>
221
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
- yield {
224
- code_output: modified_content,
225
- history: _history,
226
- history_output: history_to_chatbot_messages(_history),
227
- }
228
- return
229
 
230
- except Exception as e:
231
- print(f"Search/replace failed, falling back to normal generation: {e}")
232
- # If search/replace fails, continue with normal generation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
  # Create/lookup a session id for temp-file tracking and cleanup
235
  if _setting is not None and isinstance(_setting, dict):
@@ -415,7 +458,7 @@ Generate the exact search/replace blocks needed to make these changes."""
415
  }
416
  return
417
 
418
- # Use dynamic client based on selected model
419
  client = get_inference_client(_current_model["id"], provider)
420
 
421
  messages.append({'role': 'user', 'content': enhanced_query})
@@ -2293,6 +2336,25 @@ def _fetch_inference_provider_code(model_id: str) -> Optional[str]:
2293
  Returns:
2294
  The code snippet if model has inference providers, None otherwise
2295
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2296
  try:
2297
  # Fetch trending models data from HuggingFace API
2298
  response = requests.get("https://huggingface.co/api/trending", timeout=10)
@@ -2378,6 +2440,25 @@ def import_model_from_hf(model_id: str, prefer_local: bool = False) -> Tuple[str
2378
  if not model_id or model_id == "":
2379
  return "Please select a model.", "", "python", ""
2380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2381
  # Build model URL
2382
  model_url = f"https://huggingface.co/{model_id}"
2383
 
 
97
  yield (error_message, _history or [], history_to_chatbot_messages(_history or []))
98
  return
99
 
100
+ # CRITICAL: Catch any HuggingFace API errors for non-HF models like Gemini 3
101
+ try:
102
+ yield from _generation_code_impl(query, _setting, _history, _current_model, language, provider, profile, token, code_output, history_output, history)
103
+ except Exception as e:
104
+ import traceback
105
+ error_str = str(e)
106
+ if "Repository Not Found" in error_str and "inferenceProviderMapping" in error_str:
107
+ # This is a HuggingFace API error for a non-HF model
108
+ model_id = _current_model.get('id', 'unknown')
109
+
110
+ # Get full traceback to see where the call originated
111
+ tb = traceback.format_exc()
112
+ print(f"DEBUG: HuggingFace API error for model {model_id}")
113
+ print(f"DEBUG: Full traceback:\n{tb}")
114
+
115
+ error_message = f"""❌ Error: Attempted to validate model '{model_id}' against HuggingFace API, but this is not a HuggingFace model.
116
+
117
+ This error should not occur. Please check the server logs for the full traceback.
118
+
119
+ - Model: {model_id}
120
+ - Error: {error_str}
121
+
122
+ Try reloading the page and selecting the model again."""
123
+ if code_output is not None and history_output is not None:
124
+ yield {
125
+ code_output: error_message,
126
+ history_output: history_to_chatbot_messages(_history or []),
127
+ }
128
+ else:
129
+ yield (error_message, _history or [], history_to_chatbot_messages(_history or []))
130
+ return
131
+ else:
132
+ # Re-raise other errors
133
+ raise
134
+
135
+ def _generation_code_impl(query: Optional[str], _setting: Dict[str, str], _history: Optional[History], _current_model: Dict, language: str = "html", provider: str = "auto", profile: Optional[gr.OAuthProfile] = None, token: Optional[gr.OAuthToken] = None, code_output=None, history_output=None, history=None):
136
+ """Internal implementation of generation_code"""
137
+
138
  if query is None:
139
  query = ''
140
  if _history is None:
 
176
 
177
  # If this is a modification request, try to apply search/replace first
178
  if has_existing_content and query.strip():
179
+ # Skip search/replace for models that use native clients (non-OpenAI-compatible)
180
+ # These models need the full generation flow to work properly
181
+ native_client_models = ["gemini-3-pro-preview"]
182
+
183
+ if _current_model['id'] not in native_client_models:
184
+ try:
185
+ # Use the current model to generate search/replace instructions
186
+ client = get_inference_client(_current_model['id'], provider)
187
+
188
+ system_prompt = """You are a code editor assistant. Given existing code and modification instructions, generate EXACT search/replace blocks.
189
 
190
  CRITICAL REQUIREMENTS:
191
  1. Use EXACTLY these markers: <<<<<<< SEARCH, =======, >>>>>>> REPLACE
 
206
  }
207
  >>>>>>> REPLACE"""
208
 
209
+ user_prompt = f"""Existing code:
210
  {last_assistant_msg}
211
  Modification instructions:
212
  {query}
213
 
214
  Generate the exact search/replace blocks needed to make these changes."""
215
 
216
+ messages = [
217
+ {"role": "system", "content": system_prompt},
218
+ {"role": "user", "content": user_prompt}
219
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
+ # Generate search/replace instructions
222
+ if _current_model.get('type') == 'openai':
223
+ response = client.chat.completions.create(
224
+ model=get_real_model_id(_current_model['id']),
225
+ messages=messages,
226
+ max_tokens=4000,
227
+ temperature=0.1
228
+ )
229
+ changes_text = response.choices[0].message.content
230
+ elif _current_model.get('type') == 'mistral':
231
+ response = client.chat.complete(
232
+ model=get_real_model_id(_current_model['id']),
233
+ messages=messages,
234
+ max_tokens=4000,
235
+ temperature=0.1
236
+ )
237
+ changes_text = response.choices[0].message.content
238
+ else: # Hugging Face or other
239
+ completion = client.chat.completions.create(
240
+ model=get_real_model_id(_current_model['id']),
241
+ messages=messages,
242
+ max_tokens=4000,
243
+ temperature=0.1
244
+ )
245
+ changes_text = completion.choices[0].message.content
246
 
247
+ # Apply the search/replace changes
248
+ if language == "transformers.js" and ('=== index.html ===' in last_assistant_msg):
249
+ modified_content = apply_transformers_js_search_replace_changes(last_assistant_msg, changes_text)
250
+ else:
251
+ modified_content = apply_search_replace_changes(last_assistant_msg, changes_text)
 
252
 
253
+ # If changes were successfully applied, return the modified content
254
+ if modified_content != last_assistant_msg:
255
+ _history.append([query, modified_content])
256
+
257
+ # Generate deployment message instead of preview
258
+ deploy_message = f"""
259
+ <div style='padding: 1.5em; text-align: center; background: #f0f9ff; border: 2px solid #0ea5e9; border-radius: 10px; color: #0c4a6e;'>
260
+ <h3 style='margin-top: 0; color: #0ea5e9;'>✅ Code Updated Successfully!</h3>
261
+ <p style='margin: 0.5em 0; font-size: 1.1em;'>Your {language.upper()} code has been modified and is ready for deployment.</p>
262
+ <p style='margin: 0.5em 0; font-weight: bold;'>👉 Use the Deploy button in the sidebar to publish your app!</p>
263
+ </div>
264
+ """
265
+
266
+ yield {
267
+ code_output: modified_content,
268
+ history: _history,
269
+ history_output: history_to_chatbot_messages(_history),
270
+ }
271
+ return
272
+
273
+ except Exception as e:
274
+ print(f"Search/replace failed, falling back to normal generation: {e}")
275
+ # If search/replace fails, continue with normal generation
276
 
277
  # Create/lookup a session id for temp-file tracking and cleanup
278
  if _setting is not None and isinstance(_setting, dict):
 
458
  }
459
  return
460
 
461
+ # Use dynamic client based on selected model
462
  client = get_inference_client(_current_model["id"], provider)
463
 
464
  messages.append({'role': 'user', 'content': enhanced_query})
 
2336
  Returns:
2337
  The code snippet if model has inference providers, None otherwise
2338
  """
2339
+ # Skip non-HuggingFace models (external APIs)
2340
+ non_hf_models = [
2341
+ "gemini-3-pro-preview", "gemini-2.5-flash", "gemini-2.5-pro",
2342
+ "gemini-flash-latest", "gemini-flash-lite-latest",
2343
+ "gpt-5", "gpt-5.1", "gpt-5.1-instant", "gpt-5.1-codex", "gpt-5.1-codex-mini",
2344
+ "grok-4", "Grok-Code-Fast-1",
2345
+ "claude-opus-4.1", "claude-sonnet-4.5", "claude-haiku-4.5",
2346
+ "qwen3-30b-a3b-instruct-2507", "qwen3-30b-a3b-thinking-2507",
2347
+ "qwen3-coder-30b-a3b-instruct", "qwen3-max-preview",
2348
+ "kimi-k2-turbo-preview", "step-3",
2349
+ "codestral-2508", "mistral-medium-2508",
2350
+ "stealth-model-1",
2351
+ "openrouter/sonoma-dusk-alpha", "openrouter/sonoma-sky-alpha",
2352
+ "openrouter/sherlock-dash-alpha", "openrouter/sherlock-think-alpha"
2353
+ ]
2354
+
2355
+ if model_id in non_hf_models:
2356
+ return None
2357
+
2358
  try:
2359
  # Fetch trending models data from HuggingFace API
2360
  response = requests.get("https://huggingface.co/api/trending", timeout=10)
 
2440
  if not model_id or model_id == "":
2441
  return "Please select a model.", "", "python", ""
2442
 
2443
+ # Skip non-HuggingFace models (external APIs) - these are not importable
2444
+ non_hf_models = [
2445
+ "gemini-3-pro-preview", "gemini-2.5-flash", "gemini-2.5-pro",
2446
+ "gemini-flash-latest", "gemini-flash-lite-latest",
2447
+ "gpt-5", "gpt-5.1", "gpt-5.1-instant", "gpt-5.1-codex", "gpt-5.1-codex-mini",
2448
+ "grok-4", "Grok-Code-Fast-1",
2449
+ "claude-opus-4.1", "claude-sonnet-4.5", "claude-haiku-4.5",
2450
+ "qwen3-30b-a3b-instruct-2507", "qwen3-30b-a3b-thinking-2507",
2451
+ "qwen3-coder-30b-a3b-instruct", "qwen3-max-preview",
2452
+ "kimi-k2-turbo-preview", "step-3",
2453
+ "codestral-2508", "mistral-medium-2508",
2454
+ "stealth-model-1",
2455
+ "openrouter/sonoma-dusk-alpha", "openrouter/sonoma-sky-alpha",
2456
+ "openrouter/sherlock-dash-alpha", "openrouter/sherlock-think-alpha"
2457
+ ]
2458
+
2459
+ if model_id in non_hf_models:
2460
+ return f"❌ `{model_id}` is not a HuggingFace model and cannot be imported. This model is accessed via external API.", "", "python", ""
2461
+
2462
  # Build model URL
2463
  model_url = f"https://huggingface.co/{model_id}"
2464
 
anycoder_app/models.py CHANGED
@@ -23,9 +23,10 @@ Messages = List[Dict[str, str]]
23
  def get_inference_client(model_id, provider="auto"):
24
  """Return an InferenceClient with provider based on model_id and user selection."""
25
  if model_id == "gemini-3-pro-preview":
26
- # Use native Google GenAI client for Gemini 3 Pro Preview
27
  return genai.Client(
28
  api_key=os.getenv("GEMINI_API_KEY"),
 
29
  )
30
  elif model_id == "qwen3-30b-a3b-instruct-2507":
31
  # Use DashScope OpenAI client
 
23
  def get_inference_client(model_id, provider="auto"):
24
  """Return an InferenceClient with provider based on model_id and user selection."""
25
  if model_id == "gemini-3-pro-preview":
26
+ # Use native Google GenAI client for Gemini 3 Pro Preview with v1alpha API
27
  return genai.Client(
28
  api_key=os.getenv("GEMINI_API_KEY"),
29
+ http_options={'api_version': 'v1alpha'}
30
  )
31
  elif model_id == "qwen3-30b-a3b-instruct-2507":
32
  # Use DashScope OpenAI client
backend_api.py CHANGED
@@ -19,6 +19,15 @@ import os
19
  from huggingface_hub import InferenceClient
20
  import httpx
21
 
 
 
 
 
 
 
 
 
 
22
  # Import system prompts from standalone backend_prompts.py
23
  # No dependencies on Gradio or heavy libraries
24
  print("[Startup] Loading system prompts from backend_prompts...")
@@ -333,16 +342,20 @@ async def generate_code(
333
 
334
  async def event_stream() -> AsyncGenerator[str, None]:
335
  """Stream generated code chunks"""
 
 
 
336
  try:
337
  # Find the selected model
338
  selected_model = None
339
  for model in AVAILABLE_MODELS:
340
- if model["id"] == model_id:
341
  selected_model = model
342
  break
343
 
344
  if not selected_model:
345
  selected_model = AVAILABLE_MODELS[0]
 
346
 
347
  # Track generated code
348
  generated_code = ""
@@ -360,62 +373,13 @@ async def generate_code(
360
 
361
  print(f"[Generate] Using {language} prompt for query: {query[:100]}...")
362
 
363
- # Get the real model ID
364
- actual_model_id = selected_model["id"]
 
365
 
366
- # Determine which provider/API to use based on model ID
367
- if actual_model_id.startswith("openrouter/"):
368
- # OpenRouter models - use OpenAI client directly
369
- from openai import OpenAI
370
- api_key = os.getenv("OPENROUTER_API_KEY") or os.getenv("HF_TOKEN")
371
- client = OpenAI(
372
- base_url="https://openrouter.ai/api/v1",
373
- api_key=api_key,
374
- default_headers={
375
- "HTTP-Referer": "https://huggingface.co/spaces/akhaliq/anycoder",
376
- "X-Title": "AnyCoder"
377
- }
378
- )
379
- print(f"[Generate] Using OpenRouter with model: {actual_model_id}")
380
- elif actual_model_id == "MiniMaxAI/MiniMax-M2":
381
- # MiniMax M2 via HuggingFace with Novita provider
382
- hf_token = os.getenv("HF_TOKEN")
383
- if not hf_token:
384
- error_data = json.dumps({
385
- "type": "error",
386
- "message": "HF_TOKEN environment variable not set. Please set it in your terminal.",
387
- "timestamp": datetime.now().isoformat()
388
- })
389
- yield f"data: {error_data}\n\n"
390
- return
391
-
392
- # Use OpenAI client with HuggingFace router
393
- from openai import OpenAI
394
- client = OpenAI(
395
- base_url="https://router.huggingface.co/v1",
396
- api_key=hf_token,
397
- default_headers={
398
- "X-HF-Bill-To": "huggingface"
399
- }
400
- )
401
- # Add :novita suffix for the API call
402
- actual_model_id = "MiniMaxAI/MiniMax-M2:novita"
403
- print(f"[Generate] Using HuggingFace router for MiniMax M2")
404
- elif actual_model_id.startswith("deepseek-ai/"):
405
- # DeepSeek models via HuggingFace - use OpenAI client for better streaming
406
- from openai import OpenAI
407
- client = OpenAI(
408
- base_url="https://api-inference.huggingface.co/v1",
409
- api_key=os.getenv("HF_TOKEN")
410
- )
411
- print(f"[Generate] Using HuggingFace Inference API for DeepSeek")
412
- elif actual_model_id == "qwen3-max-preview":
413
- # Qwen via DashScope (would need separate implementation)
414
- # For now, fall back to HF
415
- client = InferenceClient(token=os.getenv("HF_TOKEN"))
416
- else:
417
- # Default: HuggingFace models
418
- client = InferenceClient(token=os.getenv("HF_TOKEN"))
419
 
420
  # Prepare messages
421
  messages = [
@@ -425,26 +389,67 @@ async def generate_code(
425
 
426
  # Stream the response
427
  try:
428
- stream = client.chat.completions.create(
429
- model=actual_model_id,
430
- messages=messages,
431
- temperature=0.7,
432
- max_tokens=10000,
433
- stream=True
434
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
 
436
  chunk_count = 0
437
  print(f"[Generate] Starting to stream from {actual_model_id}...")
438
 
439
  for chunk in stream:
440
- # Check if choices array has elements before accessing
441
- if (hasattr(chunk, 'choices') and
442
- chunk.choices and
443
- len(chunk.choices) > 0 and
444
- hasattr(chunk.choices[0], 'delta') and
445
- hasattr(chunk.choices[0].delta, 'content') and
446
- chunk.choices[0].delta.content):
447
- content = chunk.choices[0].delta.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  generated_code += content
449
  chunk_count += 1
450
 
 
19
  from huggingface_hub import InferenceClient
20
  import httpx
21
 
22
+ # Import model handling from backend_models
23
+ from backend_models import (
24
+ get_inference_client,
25
+ get_real_model_id,
26
+ create_gemini3_messages,
27
+ is_native_sdk_model,
28
+ is_mistral_model
29
+ )
30
+
31
  # Import system prompts from standalone backend_prompts.py
32
  # No dependencies on Gradio or heavy libraries
33
  print("[Startup] Loading system prompts from backend_prompts...")
 
342
 
343
  async def event_stream() -> AsyncGenerator[str, None]:
344
  """Stream generated code chunks"""
345
+ # Use the model_id from outer scope
346
+ selected_model_id = model_id
347
+
348
  try:
349
  # Find the selected model
350
  selected_model = None
351
  for model in AVAILABLE_MODELS:
352
+ if model["id"] == selected_model_id:
353
  selected_model = model
354
  break
355
 
356
  if not selected_model:
357
  selected_model = AVAILABLE_MODELS[0]
358
+ selected_model_id = selected_model["id"]
359
 
360
  # Track generated code
361
  generated_code = ""
 
373
 
374
  print(f"[Generate] Using {language} prompt for query: {query[:100]}...")
375
 
376
+ # Get the client using backend_models
377
+ print(f"[Generate] Getting client for model: {selected_model_id}")
378
+ client = get_inference_client(selected_model_id, provider)
379
 
380
+ # Get the real model ID with provider suffixes
381
+ actual_model_id = get_real_model_id(selected_model_id)
382
+ print(f"[Generate] Using model ID: {actual_model_id}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
 
384
  # Prepare messages
385
  messages = [
 
389
 
390
  # Stream the response
391
  try:
392
+ # Handle Gemini 3 Pro Preview with native SDK
393
+ if selected_model_id == "gemini-3-pro-preview":
394
+ print("[Generate] Using Gemini 3 native SDK")
395
+ contents, config = create_gemini3_messages(messages)
396
+
397
+ stream = client.models.generate_content_stream(
398
+ model="gemini-3-pro-preview",
399
+ contents=contents,
400
+ config=config,
401
+ )
402
+
403
+ # Handle Mistral models with different API
404
+ elif is_mistral_model(selected_model_id):
405
+ print("[Generate] Using Mistral SDK")
406
+ stream = client.chat.stream(
407
+ model=actual_model_id,
408
+ messages=messages,
409
+ max_tokens=10000
410
+ )
411
+
412
+ # All other models use OpenAI-compatible API
413
+ else:
414
+ stream = client.chat.completions.create(
415
+ model=actual_model_id,
416
+ messages=messages,
417
+ temperature=0.7,
418
+ max_tokens=10000,
419
+ stream=True
420
+ )
421
 
422
  chunk_count = 0
423
  print(f"[Generate] Starting to stream from {actual_model_id}...")
424
 
425
  for chunk in stream:
426
+ # Handle different response formats
427
+ chunk_content = None
428
+
429
+ if selected_model_id == "gemini-3-pro-preview":
430
+ # Gemini native SDK format: chunk.text
431
+ if hasattr(chunk, 'text') and chunk.text:
432
+ chunk_content = chunk.text
433
+ elif is_mistral_model(selected_model_id):
434
+ # Mistral format: chunk.data.choices[0].delta.content
435
+ if (hasattr(chunk, "data") and chunk.data and
436
+ hasattr(chunk.data, "choices") and chunk.data.choices and
437
+ hasattr(chunk.data.choices[0], "delta") and
438
+ hasattr(chunk.data.choices[0].delta, "content") and
439
+ chunk.data.choices[0].delta.content is not None):
440
+ chunk_content = chunk.data.choices[0].delta.content
441
+ else:
442
+ # OpenAI format: chunk.choices[0].delta.content
443
+ if (hasattr(chunk, 'choices') and
444
+ chunk.choices and
445
+ len(chunk.choices) > 0 and
446
+ hasattr(chunk.choices[0], 'delta') and
447
+ hasattr(chunk.choices[0].delta, 'content') and
448
+ chunk.choices[0].delta.content):
449
+ chunk_content = chunk.choices[0].delta.content
450
+
451
+ if chunk_content:
452
+ content = chunk_content
453
  generated_code += content
454
  chunk_count += 1
455
 
backend_models.py ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Standalone model inference and client management for AnyCoder Backend API.
3
+ No Gradio dependencies - works with FastAPI/backend only.
4
+ """
5
+ import os
6
+ from typing import Optional
7
+
8
+ from openai import OpenAI
9
+ from mistralai import Mistral
10
+
11
+ # Import genai for Gemini 3
12
+ try:
13
+ from google import genai
14
+ from google.genai import types
15
+ GEMINI_AVAILABLE = True
16
+ except ImportError:
17
+ GEMINI_AVAILABLE = False
18
+ print("WARNING: google-genai not available, Gemini 3 will not work")
19
+
20
+ def get_inference_client(model_id: str, provider: str = "auto"):
21
+ """
22
+ Return an appropriate client based on model_id.
23
+
24
+ For Gemini 3: Returns genai.Client (native Google SDK)
25
+ For others: Returns OpenAI-compatible client or raises error
26
+ """
27
+ if model_id == "gemini-3-pro-preview":
28
+ if not GEMINI_AVAILABLE:
29
+ raise ImportError("google-genai package required for Gemini 3. Install with: pip install google-genai")
30
+ # Use native Google GenAI client for Gemini 3 Pro Preview with v1alpha API
31
+ api_key = os.getenv("GEMINI_API_KEY")
32
+ if not api_key:
33
+ raise ValueError("GEMINI_API_KEY environment variable required for Gemini 3")
34
+ return genai.Client(
35
+ api_key=api_key,
36
+ http_options={'api_version': 'v1alpha'}
37
+ )
38
+
39
+ elif model_id == "qwen3-30b-a3b-instruct-2507":
40
+ # Use DashScope OpenAI client
41
+ return OpenAI(
42
+ api_key=os.getenv("DASHSCOPE_API_KEY"),
43
+ base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
44
+ )
45
+
46
+ elif model_id == "qwen3-30b-a3b-thinking-2507":
47
+ # Use DashScope OpenAI client for Thinking model
48
+ return OpenAI(
49
+ api_key=os.getenv("DASHSCOPE_API_KEY"),
50
+ base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
51
+ )
52
+
53
+ elif model_id == "qwen3-coder-30b-a3b-instruct":
54
+ # Use DashScope OpenAI client for Coder model
55
+ return OpenAI(
56
+ api_key=os.getenv("DASHSCOPE_API_KEY"),
57
+ base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
58
+ )
59
+
60
+ elif model_id == "gpt-5":
61
+ # Use Poe (OpenAI-compatible) client for GPT-5 model
62
+ return OpenAI(
63
+ api_key=os.getenv("POE_API_KEY"),
64
+ base_url="https://api.poe.com/v1"
65
+ )
66
+
67
+ elif model_id == "gpt-5.1":
68
+ # Use Poe (OpenAI-compatible) client for GPT-5.1 model
69
+ return OpenAI(
70
+ api_key=os.getenv("POE_API_KEY"),
71
+ base_url="https://api.poe.com/v1"
72
+ )
73
+
74
+ elif model_id == "gpt-5.1-instant":
75
+ # Use Poe (OpenAI-compatible) client for GPT-5.1 Instant model
76
+ return OpenAI(
77
+ api_key=os.getenv("POE_API_KEY"),
78
+ base_url="https://api.poe.com/v1"
79
+ )
80
+
81
+ elif model_id == "gpt-5.1-codex":
82
+ # Use Poe (OpenAI-compatible) client for GPT-5.1 Codex model
83
+ return OpenAI(
84
+ api_key=os.getenv("POE_API_KEY"),
85
+ base_url="https://api.poe.com/v1"
86
+ )
87
+
88
+ elif model_id == "gpt-5.1-codex-mini":
89
+ # Use Poe (OpenAI-compatible) client for GPT-5.1 Codex Mini model
90
+ return OpenAI(
91
+ api_key=os.getenv("POE_API_KEY"),
92
+ base_url="https://api.poe.com/v1"
93
+ )
94
+
95
+ elif model_id == "grok-4":
96
+ # Use Poe (OpenAI-compatible) client for Grok-4 model
97
+ return OpenAI(
98
+ api_key=os.getenv("POE_API_KEY"),
99
+ base_url="https://api.poe.com/v1"
100
+ )
101
+
102
+ elif model_id == "Grok-Code-Fast-1":
103
+ # Use Poe (OpenAI-compatible) client for Grok-Code-Fast-1 model
104
+ return OpenAI(
105
+ api_key=os.getenv("POE_API_KEY"),
106
+ base_url="https://api.poe.com/v1"
107
+ )
108
+
109
+ elif model_id == "claude-opus-4.1":
110
+ # Use Poe (OpenAI-compatible) client for Claude-Opus-4.1
111
+ return OpenAI(
112
+ api_key=os.getenv("POE_API_KEY"),
113
+ base_url="https://api.poe.com/v1"
114
+ )
115
+
116
+ elif model_id == "claude-sonnet-4.5":
117
+ # Use Poe (OpenAI-compatible) client for Claude-Sonnet-4.5
118
+ return OpenAI(
119
+ api_key=os.getenv("POE_API_KEY"),
120
+ base_url="https://api.poe.com/v1"
121
+ )
122
+
123
+ elif model_id == "claude-haiku-4.5":
124
+ # Use Poe (OpenAI-compatible) client for Claude-Haiku-4.5
125
+ return OpenAI(
126
+ api_key=os.getenv("POE_API_KEY"),
127
+ base_url="https://api.poe.com/v1"
128
+ )
129
+
130
+ elif model_id == "qwen3-max-preview":
131
+ # Use DashScope International OpenAI client for Qwen3 Max Preview
132
+ return OpenAI(
133
+ api_key=os.getenv("DASHSCOPE_API_KEY"),
134
+ base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
135
+ )
136
+
137
+ elif model_id.startswith("openrouter/"):
138
+ # OpenRouter models
139
+ return OpenAI(
140
+ api_key=os.getenv("OPENROUTER_API_KEY"),
141
+ base_url="https://openrouter.ai/api/v1",
142
+ )
143
+
144
+ elif model_id == "MiniMaxAI/MiniMax-M2":
145
+ # Use HuggingFace Router with Novita provider for MiniMax M2 model
146
+ return OpenAI(
147
+ base_url="https://router.huggingface.co/v1",
148
+ api_key=os.getenv("HF_TOKEN"),
149
+ default_headers={"X-HF-Bill-To": "huggingface"}
150
+ )
151
+
152
+ elif model_id == "step-3":
153
+ # Use StepFun API client for Step-3 model
154
+ return OpenAI(
155
+ api_key=os.getenv("STEP_API_KEY"),
156
+ base_url="https://api.stepfun.com/v1"
157
+ )
158
+
159
+ elif model_id == "codestral-2508" or model_id == "mistral-medium-2508":
160
+ # Use Mistral client for Mistral models
161
+ return Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
162
+
163
+ elif model_id == "gemini-2.5-flash":
164
+ # Use Google Gemini (OpenAI-compatible) client
165
+ return OpenAI(
166
+ api_key=os.getenv("GEMINI_API_KEY"),
167
+ base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
168
+ )
169
+
170
+ elif model_id == "gemini-2.5-pro":
171
+ # Use Google Gemini Pro (OpenAI-compatible) client
172
+ return OpenAI(
173
+ api_key=os.getenv("GEMINI_API_KEY"),
174
+ base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
175
+ )
176
+
177
+ elif model_id == "gemini-flash-latest":
178
+ # Use Google Gemini Flash Latest (OpenAI-compatible) client
179
+ return OpenAI(
180
+ api_key=os.getenv("GEMINI_API_KEY"),
181
+ base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
182
+ )
183
+
184
+ elif model_id == "gemini-flash-lite-latest":
185
+ # Use Google Gemini Flash Lite Latest (OpenAI-compatible) client
186
+ return OpenAI(
187
+ api_key=os.getenv("GEMINI_API_KEY"),
188
+ base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
189
+ )
190
+
191
+ elif model_id == "kimi-k2-turbo-preview":
192
+ # Use Moonshot AI (OpenAI-compatible) client for Kimi K2 Turbo (Preview)
193
+ return OpenAI(
194
+ api_key=os.getenv("MOONSHOT_API_KEY"),
195
+ base_url="https://api.moonshot.ai/v1",
196
+ )
197
+
198
+ elif model_id == "moonshotai/Kimi-K2-Thinking":
199
+ # Use HuggingFace Router with Novita provider
200
+ return OpenAI(
201
+ base_url="https://router.huggingface.co/v1",
202
+ api_key=os.getenv("HF_TOKEN"),
203
+ default_headers={"X-HF-Bill-To": "huggingface"}
204
+ )
205
+
206
+ elif model_id == "moonshotai/Kimi-K2-Instruct":
207
+ # Use HuggingFace Router with Groq provider
208
+ return OpenAI(
209
+ base_url="https://router.huggingface.co/v1",
210
+ api_key=os.getenv("HF_TOKEN"),
211
+ default_headers={"X-HF-Bill-To": "huggingface"}
212
+ )
213
+
214
+ elif model_id.startswith("deepseek-ai/"):
215
+ # DeepSeek models via HuggingFace Router with Novita provider
216
+ return OpenAI(
217
+ base_url="https://router.huggingface.co/v1",
218
+ api_key=os.getenv("HF_TOKEN"),
219
+ default_headers={"X-HF-Bill-To": "huggingface"}
220
+ )
221
+
222
+ elif model_id.startswith("zai-org/GLM-4"):
223
+ # GLM models via HuggingFace Router
224
+ return OpenAI(
225
+ base_url="https://router.huggingface.co/v1",
226
+ api_key=os.getenv("HF_TOKEN"),
227
+ default_headers={"X-HF-Bill-To": "huggingface"}
228
+ )
229
+
230
+ elif model_id == "stealth-model-1":
231
+ # Use stealth model with generic configuration
232
+ api_key = os.getenv("STEALTH_MODEL_1_API_KEY")
233
+ if not api_key:
234
+ raise ValueError("STEALTH_MODEL_1_API_KEY environment variable is required")
235
+
236
+ base_url = os.getenv("STEALTH_MODEL_1_BASE_URL")
237
+ if not base_url:
238
+ raise ValueError("STEALTH_MODEL_1_BASE_URL environment variable is required")
239
+
240
+ return OpenAI(
241
+ api_key=api_key,
242
+ base_url=base_url,
243
+ )
244
+
245
+ else:
246
+ # Unknown model - try HuggingFace Inference API
247
+ return OpenAI(
248
+ base_url="https://api-inference.huggingface.co/v1",
249
+ api_key=os.getenv("HF_TOKEN")
250
+ )
251
+
252
+
253
+ def get_real_model_id(model_id: str) -> str:
254
+ """Get the real model ID with provider suffixes if needed"""
255
+ if model_id == "stealth-model-1":
256
+ # Get the real model ID from environment variable
257
+ real_model_id = os.getenv("STEALTH_MODEL_1_ID")
258
+ if not real_model_id:
259
+ raise ValueError("STEALTH_MODEL_1_ID environment variable is required")
260
+ return real_model_id
261
+
262
+ elif model_id == "zai-org/GLM-4.6":
263
+ # GLM-4.6 requires provider suffix in model string for API calls
264
+ return "zai-org/GLM-4.6:zai-org"
265
+
266
+ elif model_id == "MiniMaxAI/MiniMax-M2":
267
+ # MiniMax M2 needs Novita provider suffix
268
+ return "MiniMaxAI/MiniMax-M2:novita"
269
+
270
+ elif model_id == "moonshotai/Kimi-K2-Thinking":
271
+ # Kimi K2 Thinking needs Novita provider
272
+ return "moonshotai/Kimi-K2-Thinking:novita"
273
+
274
+ elif model_id == "moonshotai/Kimi-K2-Instruct":
275
+ # Kimi K2 Instruct needs Groq provider
276
+ return "moonshotai/Kimi-K2-Instruct:groq"
277
+
278
+ elif model_id.startswith("deepseek-ai/DeepSeek-V3"):
279
+ # DeepSeek V3 models need Novita provider
280
+ return f"{model_id}:novita"
281
+
282
+ elif model_id == "zai-org/GLM-4.5":
283
+ # GLM-4.5 needs fireworks-ai provider
284
+ return "zai-org/GLM-4.5:fireworks-ai"
285
+
286
+ return model_id
287
+
288
+
289
+ def create_gemini3_messages(messages: list) -> tuple:
290
+ """
291
+ Convert OpenAI-style messages to Gemini 3 format.
292
+ Returns (contents, tools, config)
293
+ """
294
+ if not GEMINI_AVAILABLE:
295
+ raise ImportError("google-genai package required for Gemini 3")
296
+
297
+ contents = []
298
+ system_prompt = None
299
+
300
+ for msg in messages:
301
+ if msg['role'] == 'system':
302
+ system_prompt = msg['content']
303
+ elif msg['role'] in ['user', 'assistant']:
304
+ contents.append(
305
+ types.Content(
306
+ role="user" if msg['role'] == 'user' else "model",
307
+ parts=[types.Part.from_text(text=msg['content'])]
308
+ )
309
+ )
310
+
311
+ # Add system prompt as first user message if exists
312
+ if system_prompt:
313
+ contents.insert(0, types.Content(
314
+ role="user",
315
+ parts=[types.Part.from_text(text=f"System instructions: {system_prompt}")]
316
+ ))
317
+
318
+ # Configure tools and thinking
319
+ tools = [types.Tool(googleSearch=types.GoogleSearch())]
320
+ config = types.GenerateContentConfig(
321
+ thinkingConfig=types.ThinkingConfig(thinkingLevel="HIGH"),
322
+ tools=tools,
323
+ max_output_tokens=16384
324
+ )
325
+
326
+ return contents, config
327
+
328
+
329
+ def is_native_sdk_model(model_id: str) -> bool:
330
+ """Check if model uses native SDK (not OpenAI-compatible)"""
331
+ return model_id in ["gemini-3-pro-preview"]
332
+
333
+
334
+ def is_mistral_model(model_id: str) -> bool:
335
+ """Check if model uses Mistral SDK"""
336
+ return model_id in ["codestral-2508", "mistral-medium-2508"]
337
+