Spaces:

akhaliq
/

anycoder

Running

App Files Files Community

akhaliq HF Staff commited on 24 days ago

Commit

0498411

1 Parent(s): 2948440

update gemini

Browse files

Files changed (4) hide show

anycoder_app/deploy.py +145 -64
anycoder_app/models.py +2 -1
backend_api.py +76 -71
backend_models.py +337 -0

anycoder_app/deploy.py CHANGED Viewed

@@ -97,6 +97,44 @@ def generation_code(query: Optional[str], _setting: Dict[str, str], _history: Op
             yield (error_message, _history or [], history_to_chatbot_messages(_history or []))
         return
     if query is None:
         query = ''
     if _history is None:
@@ -138,11 +176,16 @@ def generation_code(query: Optional[str], _setting: Dict[str, str], _history: Op
     # If this is a modification request, try to apply search/replace first
     if has_existing_content and query.strip():
-        try:
-            # Use the current model to generate search/replace instructions
-            client = get_inference_client(_current_model['id'], provider)
-            system_prompt = """You are a code editor assistant. Given existing code and modification instructions, generate EXACT search/replace blocks.
 CRITICAL REQUIREMENTS:
 1. Use EXACTLY these markers: <<<<<<< SEARCH, =======, >>>>>>> REPLACE
@@ -163,73 +206,73 @@ Example format:
     }
 >>>>>>> REPLACE"""
-            user_prompt = f"""Existing code:
 {last_assistant_msg}
 Modification instructions:
 {query}
 Generate the exact search/replace blocks needed to make these changes."""
-            messages = [
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_prompt}
-            ]
-            # Generate search/replace instructions
-            if _current_model.get('type') == 'openai':
-                response = client.chat.completions.create(
-                    model=get_real_model_id(_current_model['id']),
-                    messages=messages,
-                    max_tokens=4000,
-                    temperature=0.1
-                )
-                changes_text = response.choices[0].message.content
-            elif _current_model.get('type') == 'mistral':
-                response = client.chat.complete(
-                    model=get_real_model_id(_current_model['id']),
-                    messages=messages,
-                    max_tokens=4000,
-                    temperature=0.1
-                )
-                changes_text = response.choices[0].message.content
-            else:  # Hugging Face or other
-                completion = client.chat.completions.create(
-                    model=get_real_model_id(_current_model['id']),
-                    messages=messages,
-                    max_tokens=4000,
-                    temperature=0.1
-                )
-                changes_text = completion.choices[0].message.content
-            # Apply the search/replace changes
-            if language == "transformers.js" and ('=== index.html ===' in last_assistant_msg):
-                modified_content = apply_transformers_js_search_replace_changes(last_assistant_msg, changes_text)
-            else:
-                modified_content = apply_search_replace_changes(last_assistant_msg, changes_text)
-            # If changes were successfully applied, return the modified content
-            if modified_content != last_assistant_msg:
-                _history.append([query, modified_content])
-                # Generate deployment message instead of preview
-                deploy_message = f"""
-                <div style='padding: 1.5em; text-align: center; background: #f0f9ff; border: 2px solid #0ea5e9; border-radius: 10px; color: #0c4a6e;'>
-                    <h3 style='margin-top: 0; color: #0ea5e9;'>✅ Code Updated Successfully!</h3>
-                    <p style='margin: 0.5em 0; font-size: 1.1em;'>Your {language.upper()} code has been modified and is ready for deployment.</p>
-                    <p style='margin: 0.5em 0; font-weight: bold;'>👉 Use the Deploy button in the sidebar to publish your app!</p>
-                </div>
-                """
-                yield {
-                    code_output: modified_content,
-                    history: _history,
-                    history_output: history_to_chatbot_messages(_history),
-                }
-                return
-        except Exception as e:
-            print(f"Search/replace failed, falling back to normal generation: {e}")
-            # If search/replace fails, continue with normal generation
     # Create/lookup a session id for temp-file tracking and cleanup
     if _setting is not None and isinstance(_setting, dict):
@@ -415,7 +458,7 @@ Generate the exact search/replace blocks needed to make these changes."""
                 }
         return
-    # Use dynamic client based on selected model
     client = get_inference_client(_current_model["id"], provider)
     messages.append({'role': 'user', 'content': enhanced_query})
@@ -2293,6 +2336,25 @@ def _fetch_inference_provider_code(model_id: str) -> Optional[str]:
     Returns:
         The code snippet if model has inference providers, None otherwise
     """
     try:
         # Fetch trending models data from HuggingFace API
         response = requests.get("https://huggingface.co/api/trending", timeout=10)
@@ -2378,6 +2440,25 @@ def import_model_from_hf(model_id: str, prefer_local: bool = False) -> Tuple[str
     if not model_id or model_id == "":
         return "Please select a model.", "", "python", ""
     # Build model URL
     model_url = f"https://huggingface.co/{model_id}"

             yield (error_message, _history or [], history_to_chatbot_messages(_history or []))
         return
+    # CRITICAL: Catch any HuggingFace API errors for non-HF models like Gemini 3
+    try:
+        yield from _generation_code_impl(query, _setting, _history, _current_model, language, provider, profile, token, code_output, history_output, history)
+    except Exception as e:
+        import traceback
+        error_str = str(e)
+        if "Repository Not Found" in error_str and "inferenceProviderMapping" in error_str:
+            # This is a HuggingFace API error for a non-HF model
+            model_id = _current_model.get('id', 'unknown')
+            # Get full traceback to see where the call originated
+            tb = traceback.format_exc()
+            print(f"DEBUG: HuggingFace API error for model {model_id}")
+            print(f"DEBUG: Full traceback:\n{tb}")
+            error_message = f"""❌ Error: Attempted to validate model '{model_id}' against HuggingFace API, but this is not a HuggingFace model.
+This error should not occur. Please check the server logs for the full traceback.
+- Model: {model_id}
+- Error: {error_str}
+Try reloading the page and selecting the model again."""
+            if code_output is not None and history_output is not None:
+                yield {
+                    code_output: error_message,
+                    history_output: history_to_chatbot_messages(_history or []),
+                }
+            else:
+                yield (error_message, _history or [], history_to_chatbot_messages(_history or []))
+            return
+        else:
+            # Re-raise other errors
+            raise
+def _generation_code_impl(query: Optional[str], _setting: Dict[str, str], _history: Optional[History], _current_model: Dict, language: str = "html", provider: str = "auto", profile: Optional[gr.OAuthProfile] = None, token: Optional[gr.OAuthToken] = None, code_output=None, history_output=None, history=None):
+    """Internal implementation of generation_code"""
     if query is None:
         query = ''
     if _history is None:
     # If this is a modification request, try to apply search/replace first
     if has_existing_content and query.strip():
+        # Skip search/replace for models that use native clients (non-OpenAI-compatible)
+        # These models need the full generation flow to work properly
+        native_client_models = ["gemini-3-pro-preview"]
+        if _current_model['id'] not in native_client_models:
+            try:
+                # Use the current model to generate search/replace instructions
+                client = get_inference_client(_current_model['id'], provider)
+                system_prompt = """You are a code editor assistant. Given existing code and modification instructions, generate EXACT search/replace blocks.
 CRITICAL REQUIREMENTS:
 1. Use EXACTLY these markers: <<<<<<< SEARCH, =======, >>>>>>> REPLACE
     }
 >>>>>>> REPLACE"""
+                user_prompt = f"""Existing code:
 {last_assistant_msg}
 Modification instructions:
 {query}
 Generate the exact search/replace blocks needed to make these changes."""
+                messages = [
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt}
+                ]
+                # Generate search/replace instructions
+                if _current_model.get('type') == 'openai':
+                    response = client.chat.completions.create(
+                        model=get_real_model_id(_current_model['id']),
+                        messages=messages,
+                        max_tokens=4000,
+                        temperature=0.1
+                    )
+                    changes_text = response.choices[0].message.content
+                elif _current_model.get('type') == 'mistral':
+                    response = client.chat.complete(
+                        model=get_real_model_id(_current_model['id']),
+                        messages=messages,
+                        max_tokens=4000,
+                        temperature=0.1
+                    )
+                    changes_text = response.choices[0].message.content
+                else:  # Hugging Face or other
+                    completion = client.chat.completions.create(
+                        model=get_real_model_id(_current_model['id']),
+                        messages=messages,
+                        max_tokens=4000,
+                        temperature=0.1
+                    )
+                    changes_text = completion.choices[0].message.content
+                # Apply the search/replace changes
+                if language == "transformers.js" and ('=== index.html ===' in last_assistant_msg):
+                    modified_content = apply_transformers_js_search_replace_changes(last_assistant_msg, changes_text)
+                else:
+                    modified_content = apply_search_replace_changes(last_assistant_msg, changes_text)
+                # If changes were successfully applied, return the modified content
+                if modified_content != last_assistant_msg:
+                    _history.append([query, modified_content])
+                    # Generate deployment message instead of preview
+                    deploy_message = f"""
+                    <div style='padding: 1.5em; text-align: center; background: #f0f9ff; border: 2px solid #0ea5e9; border-radius: 10px; color: #0c4a6e;'>
+                        <h3 style='margin-top: 0; color: #0ea5e9;'>✅ Code Updated Successfully!</h3>
+                        <p style='margin: 0.5em 0; font-size: 1.1em;'>Your {language.upper()} code has been modified and is ready for deployment.</p>
+                        <p style='margin: 0.5em 0; font-weight: bold;'>👉 Use the Deploy button in the sidebar to publish your app!</p>
+                    </div>
+                    """
+                    yield {
+                        code_output: modified_content,
+                        history: _history,
+                        history_output: history_to_chatbot_messages(_history),
+                    }
+                    return
+            except Exception as e:
+                print(f"Search/replace failed, falling back to normal generation: {e}")
+                # If search/replace fails, continue with normal generation
     # Create/lookup a session id for temp-file tracking and cleanup
     if _setting is not None and isinstance(_setting, dict):
                 }
         return
+    # Use dynamic client based on selected model
     client = get_inference_client(_current_model["id"], provider)
     messages.append({'role': 'user', 'content': enhanced_query})
     Returns:
         The code snippet if model has inference providers, None otherwise
     """
+    # Skip non-HuggingFace models (external APIs)
+    non_hf_models = [
+        "gemini-3-pro-preview", "gemini-2.5-flash", "gemini-2.5-pro",
+        "gemini-flash-latest", "gemini-flash-lite-latest",
+        "gpt-5", "gpt-5.1", "gpt-5.1-instant", "gpt-5.1-codex", "gpt-5.1-codex-mini",
+        "grok-4", "Grok-Code-Fast-1",
+        "claude-opus-4.1", "claude-sonnet-4.5", "claude-haiku-4.5",
+        "qwen3-30b-a3b-instruct-2507", "qwen3-30b-a3b-thinking-2507",
+        "qwen3-coder-30b-a3b-instruct", "qwen3-max-preview",
+        "kimi-k2-turbo-preview", "step-3",
+        "codestral-2508", "mistral-medium-2508",
+        "stealth-model-1",
+        "openrouter/sonoma-dusk-alpha", "openrouter/sonoma-sky-alpha",
+        "openrouter/sherlock-dash-alpha", "openrouter/sherlock-think-alpha"
+    ]
+    if model_id in non_hf_models:
+        return None
     try:
         # Fetch trending models data from HuggingFace API
         response = requests.get("https://huggingface.co/api/trending", timeout=10)
     if not model_id or model_id == "":
         return "Please select a model.", "", "python", ""
+    # Skip non-HuggingFace models (external APIs) - these are not importable
+    non_hf_models = [
+        "gemini-3-pro-preview", "gemini-2.5-flash", "gemini-2.5-pro",
+        "gemini-flash-latest", "gemini-flash-lite-latest",
+        "gpt-5", "gpt-5.1", "gpt-5.1-instant", "gpt-5.1-codex", "gpt-5.1-codex-mini",
+        "grok-4", "Grok-Code-Fast-1",
+        "claude-opus-4.1", "claude-sonnet-4.5", "claude-haiku-4.5",
+        "qwen3-30b-a3b-instruct-2507", "qwen3-30b-a3b-thinking-2507",
+        "qwen3-coder-30b-a3b-instruct", "qwen3-max-preview",
+        "kimi-k2-turbo-preview", "step-3",
+        "codestral-2508", "mistral-medium-2508",
+        "stealth-model-1",
+        "openrouter/sonoma-dusk-alpha", "openrouter/sonoma-sky-alpha",
+        "openrouter/sherlock-dash-alpha", "openrouter/sherlock-think-alpha"
+    ]
+    if model_id in non_hf_models:
+        return f"❌ `{model_id}` is not a HuggingFace model and cannot be imported. This model is accessed via external API.", "", "python", ""
     # Build model URL
     model_url = f"https://huggingface.co/{model_id}"

anycoder_app/models.py CHANGED Viewed

@@ -23,9 +23,10 @@ Messages = List[Dict[str, str]]
 def get_inference_client(model_id, provider="auto"):
     """Return an InferenceClient with provider based on model_id and user selection."""
     if model_id == "gemini-3-pro-preview":
-        # Use native Google GenAI client for Gemini 3 Pro Preview
         return genai.Client(
             api_key=os.getenv("GEMINI_API_KEY"),
         )
     elif model_id == "qwen3-30b-a3b-instruct-2507":
         # Use DashScope OpenAI client

 def get_inference_client(model_id, provider="auto"):
     """Return an InferenceClient with provider based on model_id and user selection."""
     if model_id == "gemini-3-pro-preview":
+        # Use native Google GenAI client for Gemini 3 Pro Preview with v1alpha API
         return genai.Client(
             api_key=os.getenv("GEMINI_API_KEY"),
+            http_options={'api_version': 'v1alpha'}
         )
     elif model_id == "qwen3-30b-a3b-instruct-2507":
         # Use DashScope OpenAI client

backend_api.py CHANGED Viewed

@@ -19,6 +19,15 @@ import os
 from huggingface_hub import InferenceClient
 import httpx
 # Import system prompts from standalone backend_prompts.py
 # No dependencies on Gradio or heavy libraries
 print("[Startup] Loading system prompts from backend_prompts...")
@@ -333,16 +342,20 @@ async def generate_code(
     async def event_stream() -> AsyncGenerator[str, None]:
         """Stream generated code chunks"""
         try:
             # Find the selected model
             selected_model = None
             for model in AVAILABLE_MODELS:
-                if model["id"] == model_id:
                     selected_model = model
                     break
             if not selected_model:
                 selected_model = AVAILABLE_MODELS[0]
             # Track generated code
             generated_code = ""
@@ -360,62 +373,13 @@ async def generate_code(
             print(f"[Generate] Using {language} prompt for query: {query[:100]}...")
-            # Get the real model ID
-            actual_model_id = selected_model["id"]
-            # Determine which provider/API to use based on model ID
-            if actual_model_id.startswith("openrouter/"):
-                # OpenRouter models - use OpenAI client directly
-                from openai import OpenAI
-                api_key = os.getenv("OPENROUTER_API_KEY") or os.getenv("HF_TOKEN")
-                client = OpenAI(
-                    base_url="https://openrouter.ai/api/v1",
-                    api_key=api_key,
-                    default_headers={
-                        "HTTP-Referer": "https://huggingface.co/spaces/akhaliq/anycoder",
-                        "X-Title": "AnyCoder"
-                    }
-                )
-                print(f"[Generate] Using OpenRouter with model: {actual_model_id}")
-            elif actual_model_id == "MiniMaxAI/MiniMax-M2":
-                # MiniMax M2 via HuggingFace with Novita provider
-                hf_token = os.getenv("HF_TOKEN")
-                if not hf_token:
-                    error_data = json.dumps({
-                        "type": "error",
-                        "message": "HF_TOKEN environment variable not set. Please set it in your terminal.",
-                        "timestamp": datetime.now().isoformat()
-                    })
-                    yield f"data: {error_data}\n\n"
-                    return
-                # Use OpenAI client with HuggingFace router
-                from openai import OpenAI
-                client = OpenAI(
-                    base_url="https://router.huggingface.co/v1",
-                    api_key=hf_token,
-                    default_headers={
-                        "X-HF-Bill-To": "huggingface"
-                    }
-                )
-                # Add :novita suffix for the API call
-                actual_model_id = "MiniMaxAI/MiniMax-M2:novita"
-                print(f"[Generate] Using HuggingFace router for MiniMax M2")
-            elif actual_model_id.startswith("deepseek-ai/"):
-                # DeepSeek models via HuggingFace - use OpenAI client for better streaming
-                from openai import OpenAI
-                client = OpenAI(
-                    base_url="https://api-inference.huggingface.co/v1",
-                    api_key=os.getenv("HF_TOKEN")
-                )
-                print(f"[Generate] Using HuggingFace Inference API for DeepSeek")
-            elif actual_model_id == "qwen3-max-preview":
-                # Qwen via DashScope (would need separate implementation)
-                # For now, fall back to HF
-                client = InferenceClient(token=os.getenv("HF_TOKEN"))
-            else:
-                # Default: HuggingFace models
-                client = InferenceClient(token=os.getenv("HF_TOKEN"))
             # Prepare messages
             messages = [
@@ -425,26 +389,67 @@ async def generate_code(
             # Stream the response
             try:
-                stream = client.chat.completions.create(
-                    model=actual_model_id,
-                    messages=messages,
-                    temperature=0.7,
-                    max_tokens=10000,
-                    stream=True
-                )
                 chunk_count = 0
                 print(f"[Generate] Starting to stream from {actual_model_id}...")
                 for chunk in stream:
-                    # Check if choices array has elements before accessing
-                    if (hasattr(chunk, 'choices') and
-                        chunk.choices and
-                        len(chunk.choices) > 0 and
-                        hasattr(chunk.choices[0], 'delta') and
-                        hasattr(chunk.choices[0].delta, 'content') and
-                        chunk.choices[0].delta.content):
-                        content = chunk.choices[0].delta.content
                         generated_code += content
                         chunk_count += 1

 from huggingface_hub import InferenceClient
 import httpx
+# Import model handling from backend_models
+from backend_models import (
+    get_inference_client,
+    get_real_model_id,
+    create_gemini3_messages,
+    is_native_sdk_model,
+    is_mistral_model
+)
 # Import system prompts from standalone backend_prompts.py
 # No dependencies on Gradio or heavy libraries
 print("[Startup] Loading system prompts from backend_prompts...")
     async def event_stream() -> AsyncGenerator[str, None]:
         """Stream generated code chunks"""
+        # Use the model_id from outer scope
+        selected_model_id = model_id
         try:
             # Find the selected model
             selected_model = None
             for model in AVAILABLE_MODELS:
+                if model["id"] == selected_model_id:
                     selected_model = model
                     break
             if not selected_model:
                 selected_model = AVAILABLE_MODELS[0]
+                selected_model_id = selected_model["id"]
             # Track generated code
             generated_code = ""
             print(f"[Generate] Using {language} prompt for query: {query[:100]}...")
+            # Get the client using backend_models
+            print(f"[Generate] Getting client for model: {selected_model_id}")
+            client = get_inference_client(selected_model_id, provider)
+            # Get the real model ID with provider suffixes
+            actual_model_id = get_real_model_id(selected_model_id)
+            print(f"[Generate] Using model ID: {actual_model_id}")
             # Prepare messages
             messages = [
             # Stream the response
             try:
+                # Handle Gemini 3 Pro Preview with native SDK
+                if selected_model_id == "gemini-3-pro-preview":
+                    print("[Generate] Using Gemini 3 native SDK")
+                    contents, config = create_gemini3_messages(messages)
+                    stream = client.models.generate_content_stream(
+                        model="gemini-3-pro-preview",
+                        contents=contents,
+                        config=config,
+                    )
+                # Handle Mistral models with different API
+                elif is_mistral_model(selected_model_id):
+                    print("[Generate] Using Mistral SDK")
+                    stream = client.chat.stream(
+                        model=actual_model_id,
+                        messages=messages,
+                        max_tokens=10000
+                    )
+                # All other models use OpenAI-compatible API
+                else:
+                    stream = client.chat.completions.create(
+                        model=actual_model_id,
+                        messages=messages,
+                        temperature=0.7,
+                        max_tokens=10000,
+                        stream=True
+                    )
                 chunk_count = 0
                 print(f"[Generate] Starting to stream from {actual_model_id}...")
                 for chunk in stream:
+                    # Handle different response formats
+                    chunk_content = None
+                    if selected_model_id == "gemini-3-pro-preview":
+                        # Gemini native SDK format: chunk.text
+                        if hasattr(chunk, 'text') and chunk.text:
+                            chunk_content = chunk.text
+                    elif is_mistral_model(selected_model_id):
+                        # Mistral format: chunk.data.choices[0].delta.content
+                        if (hasattr(chunk, "data") and chunk.data and
+                            hasattr(chunk.data, "choices") and chunk.data.choices and
+                            hasattr(chunk.data.choices[0], "delta") and
+                            hasattr(chunk.data.choices[0].delta, "content") and
+                            chunk.data.choices[0].delta.content is not None):
+                            chunk_content = chunk.data.choices[0].delta.content
+                    else:
+                        # OpenAI format: chunk.choices[0].delta.content
+                        if (hasattr(chunk, 'choices') and
+                            chunk.choices and
+                            len(chunk.choices) > 0 and
+                            hasattr(chunk.choices[0], 'delta') and
+                            hasattr(chunk.choices[0].delta, 'content') and
+                            chunk.choices[0].delta.content):
+                            chunk_content = chunk.choices[0].delta.content
+                    if chunk_content:
+                        content = chunk_content
                         generated_code += content
                         chunk_count += 1

backend_models.py ADDED Viewed

	@@ -0,0 +1,337 @@

+"""
+Standalone model inference and client management for AnyCoder Backend API.
+No Gradio dependencies - works with FastAPI/backend only.
+"""
+import os
+from typing import Optional
+from openai import OpenAI
+from mistralai import Mistral
+# Import genai for Gemini 3
+try:
+    from google import genai
+    from google.genai import types
+    GEMINI_AVAILABLE = True
+except ImportError:
+    GEMINI_AVAILABLE = False
+    print("WARNING: google-genai not available, Gemini 3 will not work")
+def get_inference_client(model_id: str, provider: str = "auto"):
+    """
+    Return an appropriate client based on model_id.
+    For Gemini 3: Returns genai.Client (native Google SDK)
+    For others: Returns OpenAI-compatible client or raises error
+    """
+    if model_id == "gemini-3-pro-preview":
+        if not GEMINI_AVAILABLE:
+            raise ImportError("google-genai package required for Gemini 3. Install with: pip install google-genai")
+        # Use native Google GenAI client for Gemini 3 Pro Preview with v1alpha API
+        api_key = os.getenv("GEMINI_API_KEY")
+        if not api_key:
+            raise ValueError("GEMINI_API_KEY environment variable required for Gemini 3")
+        return genai.Client(
+            api_key=api_key,
+            http_options={'api_version': 'v1alpha'}
+        )
+    elif model_id == "qwen3-30b-a3b-instruct-2507":
+        # Use DashScope OpenAI client
+        return OpenAI(
+            api_key=os.getenv("DASHSCOPE_API_KEY"),
+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+        )
+    elif model_id == "qwen3-30b-a3b-thinking-2507":
+        # Use DashScope OpenAI client for Thinking model
+        return OpenAI(
+            api_key=os.getenv("DASHSCOPE_API_KEY"),
+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+        )
+    elif model_id == "qwen3-coder-30b-a3b-instruct":
+        # Use DashScope OpenAI client for Coder model
+        return OpenAI(
+            api_key=os.getenv("DASHSCOPE_API_KEY"),
+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+        )
+    elif model_id == "gpt-5":
+        # Use Poe (OpenAI-compatible) client for GPT-5 model
+        return OpenAI(
+            api_key=os.getenv("POE_API_KEY"),
+            base_url="https://api.poe.com/v1"
+        )
+    elif model_id == "gpt-5.1":
+        # Use Poe (OpenAI-compatible) client for GPT-5.1 model
+        return OpenAI(
+            api_key=os.getenv("POE_API_KEY"),
+            base_url="https://api.poe.com/v1"
+        )
+    elif model_id == "gpt-5.1-instant":
+        # Use Poe (OpenAI-compatible) client for GPT-5.1 Instant model
+        return OpenAI(
+            api_key=os.getenv("POE_API_KEY"),
+            base_url="https://api.poe.com/v1"
+        )
+    elif model_id == "gpt-5.1-codex":
+        # Use Poe (OpenAI-compatible) client for GPT-5.1 Codex model
+        return OpenAI(
+            api_key=os.getenv("POE_API_KEY"),
+            base_url="https://api.poe.com/v1"
+        )
+    elif model_id == "gpt-5.1-codex-mini":
+        # Use Poe (OpenAI-compatible) client for GPT-5.1 Codex Mini model
+        return OpenAI(
+            api_key=os.getenv("POE_API_KEY"),
+            base_url="https://api.poe.com/v1"
+        )
+    elif model_id == "grok-4":
+        # Use Poe (OpenAI-compatible) client for Grok-4 model
+        return OpenAI(
+            api_key=os.getenv("POE_API_KEY"),
+            base_url="https://api.poe.com/v1"
+        )
+    elif model_id == "Grok-Code-Fast-1":
+        # Use Poe (OpenAI-compatible) client for Grok-Code-Fast-1 model
+        return OpenAI(
+            api_key=os.getenv("POE_API_KEY"),
+            base_url="https://api.poe.com/v1"
+        )
+    elif model_id == "claude-opus-4.1":
+        # Use Poe (OpenAI-compatible) client for Claude-Opus-4.1
+        return OpenAI(
+            api_key=os.getenv("POE_API_KEY"),
+            base_url="https://api.poe.com/v1"
+        )
+    elif model_id == "claude-sonnet-4.5":
+        # Use Poe (OpenAI-compatible) client for Claude-Sonnet-4.5
+        return OpenAI(
+            api_key=os.getenv("POE_API_KEY"),
+            base_url="https://api.poe.com/v1"
+        )
+    elif model_id == "claude-haiku-4.5":
+        # Use Poe (OpenAI-compatible) client for Claude-Haiku-4.5
+        return OpenAI(
+            api_key=os.getenv("POE_API_KEY"),
+            base_url="https://api.poe.com/v1"
+        )
+    elif model_id == "qwen3-max-preview":
+        # Use DashScope International OpenAI client for Qwen3 Max Preview
+        return OpenAI(
+            api_key=os.getenv("DASHSCOPE_API_KEY"),
+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+        )
+    elif model_id.startswith("openrouter/"):
+        # OpenRouter models
+        return OpenAI(
+            api_key=os.getenv("OPENROUTER_API_KEY"),
+            base_url="https://openrouter.ai/api/v1",
+        )
+    elif model_id == "MiniMaxAI/MiniMax-M2":
+        # Use HuggingFace Router with Novita provider for MiniMax M2 model
+        return OpenAI(
+            base_url="https://router.huggingface.co/v1",
+            api_key=os.getenv("HF_TOKEN"),
+            default_headers={"X-HF-Bill-To": "huggingface"}
+        )
+    elif model_id == "step-3":
+        # Use StepFun API client for Step-3 model
+        return OpenAI(
+            api_key=os.getenv("STEP_API_KEY"),
+            base_url="https://api.stepfun.com/v1"
+        )
+    elif model_id == "codestral-2508" or model_id == "mistral-medium-2508":
+        # Use Mistral client for Mistral models
+        return Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
+    elif model_id == "gemini-2.5-flash":
+        # Use Google Gemini (OpenAI-compatible) client
+        return OpenAI(
+            api_key=os.getenv("GEMINI_API_KEY"),
+            base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
+        )
+    elif model_id == "gemini-2.5-pro":
+        # Use Google Gemini Pro (OpenAI-compatible) client
+        return OpenAI(
+            api_key=os.getenv("GEMINI_API_KEY"),
+            base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
+        )
+    elif model_id == "gemini-flash-latest":
+        # Use Google Gemini Flash Latest (OpenAI-compatible) client
+        return OpenAI(
+            api_key=os.getenv("GEMINI_API_KEY"),
+            base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
+        )
+    elif model_id == "gemini-flash-lite-latest":
+        # Use Google Gemini Flash Lite Latest (OpenAI-compatible) client
+        return OpenAI(
+            api_key=os.getenv("GEMINI_API_KEY"),
+            base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
+        )
+    elif model_id == "kimi-k2-turbo-preview":
+        # Use Moonshot AI (OpenAI-compatible) client for Kimi K2 Turbo (Preview)
+        return OpenAI(
+            api_key=os.getenv("MOONSHOT_API_KEY"),
+            base_url="https://api.moonshot.ai/v1",
+        )
+    elif model_id == "moonshotai/Kimi-K2-Thinking":
+        # Use HuggingFace Router with Novita provider
+        return OpenAI(
+            base_url="https://router.huggingface.co/v1",
+            api_key=os.getenv("HF_TOKEN"),
+            default_headers={"X-HF-Bill-To": "huggingface"}
+        )
+    elif model_id == "moonshotai/Kimi-K2-Instruct":
+        # Use HuggingFace Router with Groq provider
+        return OpenAI(
+            base_url="https://router.huggingface.co/v1",
+            api_key=os.getenv("HF_TOKEN"),
+            default_headers={"X-HF-Bill-To": "huggingface"}
+        )
+    elif model_id.startswith("deepseek-ai/"):
+        # DeepSeek models via HuggingFace Router with Novita provider
+        return OpenAI(
+            base_url="https://router.huggingface.co/v1",
+            api_key=os.getenv("HF_TOKEN"),
+            default_headers={"X-HF-Bill-To": "huggingface"}
+        )
+    elif model_id.startswith("zai-org/GLM-4"):
+        # GLM models via HuggingFace Router
+        return OpenAI(
+            base_url="https://router.huggingface.co/v1",
+            api_key=os.getenv("HF_TOKEN"),
+            default_headers={"X-HF-Bill-To": "huggingface"}
+        )
+    elif model_id == "stealth-model-1":
+        # Use stealth model with generic configuration
+        api_key = os.getenv("STEALTH_MODEL_1_API_KEY")
+        if not api_key:
+            raise ValueError("STEALTH_MODEL_1_API_KEY environment variable is required")
+        base_url = os.getenv("STEALTH_MODEL_1_BASE_URL")
+        if not base_url:
+            raise ValueError("STEALTH_MODEL_1_BASE_URL environment variable is required")
+        return OpenAI(
+            api_key=api_key,
+            base_url=base_url,
+        )
+    else:
+        # Unknown model - try HuggingFace Inference API
+        return OpenAI(
+            base_url="https://api-inference.huggingface.co/v1",
+            api_key=os.getenv("HF_TOKEN")
+        )
+def get_real_model_id(model_id: str) -> str:
+    """Get the real model ID with provider suffixes if needed"""
+    if model_id == "stealth-model-1":
+        # Get the real model ID from environment variable
+        real_model_id = os.getenv("STEALTH_MODEL_1_ID")
+        if not real_model_id:
+            raise ValueError("STEALTH_MODEL_1_ID environment variable is required")
+        return real_model_id
+    elif model_id == "zai-org/GLM-4.6":
+        # GLM-4.6 requires provider suffix in model string for API calls
+        return "zai-org/GLM-4.6:zai-org"
+    elif model_id == "MiniMaxAI/MiniMax-M2":
+        # MiniMax M2 needs Novita provider suffix
+        return "MiniMaxAI/MiniMax-M2:novita"
+    elif model_id == "moonshotai/Kimi-K2-Thinking":
+        # Kimi K2 Thinking needs Novita provider
+        return "moonshotai/Kimi-K2-Thinking:novita"
+    elif model_id == "moonshotai/Kimi-K2-Instruct":
+        # Kimi K2 Instruct needs Groq provider
+        return "moonshotai/Kimi-K2-Instruct:groq"
+    elif model_id.startswith("deepseek-ai/DeepSeek-V3"):
+        # DeepSeek V3 models need Novita provider
+        return f"{model_id}:novita"
+    elif model_id == "zai-org/GLM-4.5":
+        # GLM-4.5 needs fireworks-ai provider
+        return "zai-org/GLM-4.5:fireworks-ai"
+    return model_id
+def create_gemini3_messages(messages: list) -> tuple:
+    """
+    Convert OpenAI-style messages to Gemini 3 format.
+    Returns (contents, tools, config)
+    """
+    if not GEMINI_AVAILABLE:
+        raise ImportError("google-genai package required for Gemini 3")
+    contents = []
+    system_prompt = None
+    for msg in messages:
+        if msg['role'] == 'system':
+            system_prompt = msg['content']
+        elif msg['role'] in ['user', 'assistant']:
+            contents.append(
+                types.Content(
+                    role="user" if msg['role'] == 'user' else "model",
+                    parts=[types.Part.from_text(text=msg['content'])]
+                )
+            )
+    # Add system prompt as first user message if exists
+    if system_prompt:
+        contents.insert(0, types.Content(
+            role="user",
+            parts=[types.Part.from_text(text=f"System instructions: {system_prompt}")]
+        ))
+    # Configure tools and thinking
+    tools = [types.Tool(googleSearch=types.GoogleSearch())]
+    config = types.GenerateContentConfig(
+        thinkingConfig=types.ThinkingConfig(thinkingLevel="HIGH"),
+        tools=tools,
+        max_output_tokens=16384
+    )
+    return contents, config
+def is_native_sdk_model(model_id: str) -> bool:
+    """Check if model uses native SDK (not OpenAI-compatible)"""
+    return model_id in ["gemini-3-pro-preview"]
+def is_mistral_model(model_id: str) -> bool:
+    """Check if model uses Mistral SDK"""
+    return model_id in ["codestral-2508", "mistral-medium-2508"]