|
|
""" |
|
|
Standalone model inference and client management for AnyCoder Backend API. |
|
|
No Gradio dependencies - works with FastAPI/backend only. |
|
|
""" |
|
|
import os |
|
|
from typing import Optional |
|
|
|
|
|
from openai import OpenAI |
|
|
from mistralai import Mistral |
|
|
|
|
|
|
|
|
try: |
|
|
from google import genai |
|
|
from google.genai import types |
|
|
GEMINI_AVAILABLE = True |
|
|
except ImportError: |
|
|
GEMINI_AVAILABLE = False |
|
|
|
|
|
def get_inference_client(model_id: str, provider: str = "auto"): |
|
|
""" |
|
|
Return an appropriate client based on model_id. |
|
|
|
|
|
Returns OpenAI-compatible client for all models or raises error if not configured. |
|
|
""" |
|
|
if model_id == "gemini-3.0-pro": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("POE_API_KEY"), |
|
|
base_url="https://api.poe.com/v1" |
|
|
) |
|
|
|
|
|
elif model_id == "qwen3-30b-a3b-instruct-2507": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("DASHSCOPE_API_KEY"), |
|
|
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", |
|
|
) |
|
|
|
|
|
elif model_id == "qwen3-30b-a3b-thinking-2507": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("DASHSCOPE_API_KEY"), |
|
|
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", |
|
|
) |
|
|
|
|
|
elif model_id == "qwen3-coder-30b-a3b-instruct": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("DASHSCOPE_API_KEY"), |
|
|
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", |
|
|
) |
|
|
|
|
|
elif model_id == "gpt-5.1": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("POE_API_KEY"), |
|
|
base_url="https://api.poe.com/v1" |
|
|
) |
|
|
|
|
|
elif model_id == "gpt-5.1-instant": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("POE_API_KEY"), |
|
|
base_url="https://api.poe.com/v1" |
|
|
) |
|
|
|
|
|
elif model_id == "gpt-5.1-codex": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("POE_API_KEY"), |
|
|
base_url="https://api.poe.com/v1" |
|
|
) |
|
|
|
|
|
elif model_id == "gpt-5.1-codex-mini": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("POE_API_KEY"), |
|
|
base_url="https://api.poe.com/v1" |
|
|
) |
|
|
|
|
|
elif model_id == "grok-4": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("POE_API_KEY"), |
|
|
base_url="https://api.poe.com/v1" |
|
|
) |
|
|
|
|
|
elif model_id == "Grok-Code-Fast-1": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("POE_API_KEY"), |
|
|
base_url="https://api.poe.com/v1" |
|
|
) |
|
|
|
|
|
elif model_id == "claude-opus-4.1": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("POE_API_KEY"), |
|
|
base_url="https://api.poe.com/v1" |
|
|
) |
|
|
|
|
|
elif model_id == "claude-opus-4.5": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("POE_API_KEY"), |
|
|
base_url="https://api.poe.com/v1" |
|
|
) |
|
|
|
|
|
elif model_id == "claude-sonnet-4.5": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("POE_API_KEY"), |
|
|
base_url="https://api.poe.com/v1" |
|
|
) |
|
|
|
|
|
elif model_id == "claude-haiku-4.5": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("POE_API_KEY"), |
|
|
base_url="https://api.poe.com/v1" |
|
|
) |
|
|
|
|
|
elif model_id == "x-ai/grok-4.1-fast": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("OPENROUTER_API_KEY"), |
|
|
base_url="https://openrouter.ai/api/v1", |
|
|
) |
|
|
|
|
|
elif model_id.startswith("openrouter/"): |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("OPENROUTER_API_KEY"), |
|
|
base_url="https://openrouter.ai/api/v1", |
|
|
) |
|
|
|
|
|
elif model_id == "MiniMaxAI/MiniMax-M2": |
|
|
|
|
|
return OpenAI( |
|
|
base_url="https://router.huggingface.co/v1", |
|
|
api_key=os.getenv("HF_TOKEN"), |
|
|
default_headers={"X-HF-Bill-To": "huggingface"} |
|
|
) |
|
|
|
|
|
elif model_id == "step-3": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("STEP_API_KEY"), |
|
|
base_url="https://api.stepfun.com/v1" |
|
|
) |
|
|
|
|
|
elif model_id == "codestral-2508" or model_id == "mistral-medium-2508": |
|
|
|
|
|
return Mistral(api_key=os.getenv("MISTRAL_API_KEY")) |
|
|
|
|
|
elif model_id == "gemini-2.5-flash": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("GEMINI_API_KEY"), |
|
|
base_url="https://generativelanguage.googleapis.com/v1beta/openai/", |
|
|
) |
|
|
|
|
|
elif model_id == "gemini-2.5-pro": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("GEMINI_API_KEY"), |
|
|
base_url="https://generativelanguage.googleapis.com/v1beta/openai/", |
|
|
) |
|
|
|
|
|
elif model_id == "gemini-flash-lite-latest": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("GEMINI_API_KEY"), |
|
|
base_url="https://generativelanguage.googleapis.com/v1beta/openai/", |
|
|
) |
|
|
|
|
|
elif model_id == "kimi-k2-turbo-preview": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("MOONSHOT_API_KEY"), |
|
|
base_url="https://api.moonshot.ai/v1", |
|
|
) |
|
|
|
|
|
elif model_id == "moonshotai/Kimi-K2-Thinking": |
|
|
|
|
|
return OpenAI( |
|
|
base_url="https://router.huggingface.co/v1", |
|
|
api_key=os.getenv("HF_TOKEN"), |
|
|
default_headers={"X-HF-Bill-To": "huggingface"} |
|
|
) |
|
|
|
|
|
elif model_id == "moonshotai/Kimi-K2-Instruct": |
|
|
|
|
|
return OpenAI( |
|
|
base_url="https://router.huggingface.co/v1", |
|
|
api_key=os.getenv("HF_TOKEN"), |
|
|
default_headers={"X-HF-Bill-To": "huggingface"} |
|
|
) |
|
|
|
|
|
elif model_id.startswith("deepseek-ai/"): |
|
|
|
|
|
return OpenAI( |
|
|
base_url="https://router.huggingface.co/v1", |
|
|
api_key=os.getenv("HF_TOKEN"), |
|
|
default_headers={"X-HF-Bill-To": "huggingface"} |
|
|
) |
|
|
|
|
|
elif model_id.startswith("zai-org/GLM-4"): |
|
|
|
|
|
return OpenAI( |
|
|
base_url="https://router.huggingface.co/v1", |
|
|
api_key=os.getenv("HF_TOKEN"), |
|
|
default_headers={"X-HF-Bill-To": "huggingface"} |
|
|
) |
|
|
|
|
|
elif model_id.startswith("moonshotai/Kimi-K2"): |
|
|
|
|
|
return OpenAI( |
|
|
base_url="https://router.huggingface.co/v1", |
|
|
api_key=os.getenv("HF_TOKEN"), |
|
|
default_headers={"X-HF-Bill-To": "huggingface"} |
|
|
) |
|
|
|
|
|
elif model_id == "stealth-model-1": |
|
|
|
|
|
api_key = os.getenv("STEALTH_MODEL_1_API_KEY") |
|
|
if not api_key: |
|
|
raise ValueError("STEALTH_MODEL_1_API_KEY environment variable is required") |
|
|
|
|
|
base_url = os.getenv("STEALTH_MODEL_1_BASE_URL") |
|
|
if not base_url: |
|
|
raise ValueError("STEALTH_MODEL_1_BASE_URL environment variable is required") |
|
|
|
|
|
return OpenAI( |
|
|
api_key=api_key, |
|
|
base_url=base_url, |
|
|
) |
|
|
|
|
|
else: |
|
|
|
|
|
return OpenAI( |
|
|
base_url="https://api-inference.huggingface.co/v1", |
|
|
api_key=os.getenv("HF_TOKEN") |
|
|
) |
|
|
|
|
|
|
|
|
def get_real_model_id(model_id: str) -> str: |
|
|
"""Get the real model ID with provider suffixes if needed""" |
|
|
if model_id == "stealth-model-1": |
|
|
|
|
|
real_model_id = os.getenv("STEALTH_MODEL_1_ID") |
|
|
if not real_model_id: |
|
|
raise ValueError("STEALTH_MODEL_1_ID environment variable is required") |
|
|
return real_model_id |
|
|
|
|
|
elif model_id == "zai-org/GLM-4.6": |
|
|
|
|
|
return "zai-org/GLM-4.6:cerebras" |
|
|
|
|
|
elif model_id == "MiniMaxAI/MiniMax-M2": |
|
|
|
|
|
return "MiniMaxAI/MiniMax-M2:novita" |
|
|
|
|
|
elif model_id == "moonshotai/Kimi-K2-Thinking": |
|
|
|
|
|
return "moonshotai/Kimi-K2-Thinking:together" |
|
|
|
|
|
elif model_id == "moonshotai/Kimi-K2-Instruct": |
|
|
|
|
|
return "moonshotai/Kimi-K2-Instruct:groq" |
|
|
|
|
|
elif model_id.startswith("deepseek-ai/DeepSeek-V3") or model_id.startswith("deepseek-ai/DeepSeek-R1"): |
|
|
|
|
|
return f"{model_id}:novita" |
|
|
|
|
|
elif model_id == "zai-org/GLM-4.5": |
|
|
|
|
|
return "zai-org/GLM-4.5:fireworks-ai" |
|
|
|
|
|
return model_id |
|
|
|
|
|
|
|
|
def create_gemini3_messages(messages: list) -> tuple: |
|
|
""" |
|
|
Convert OpenAI-style messages to Gemini 3 format. |
|
|
Returns (contents, tools, config) |
|
|
""" |
|
|
if not GEMINI_AVAILABLE: |
|
|
raise ImportError("google-genai package required for Gemini 3") |
|
|
|
|
|
contents = [] |
|
|
system_prompt = None |
|
|
|
|
|
for msg in messages: |
|
|
if msg['role'] == 'system': |
|
|
system_prompt = msg['content'] |
|
|
elif msg['role'] in ['user', 'assistant']: |
|
|
contents.append( |
|
|
types.Content( |
|
|
role="user" if msg['role'] == 'user' else "model", |
|
|
parts=[types.Part.from_text(text=msg['content'])] |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
if system_prompt: |
|
|
contents.insert(0, types.Content( |
|
|
role="user", |
|
|
parts=[types.Part.from_text(text=f"System instructions: {system_prompt}")] |
|
|
)) |
|
|
|
|
|
|
|
|
tools = [types.Tool(googleSearch=types.GoogleSearch())] |
|
|
config = types.GenerateContentConfig( |
|
|
thinkingConfig=types.ThinkingConfig(thinkingLevel="HIGH"), |
|
|
tools=tools, |
|
|
max_output_tokens=16384 |
|
|
) |
|
|
|
|
|
return contents, config |
|
|
|
|
|
|
|
|
def is_native_sdk_model(model_id: str) -> bool: |
|
|
"""Check if model uses native SDK (not OpenAI-compatible)""" |
|
|
return False |
|
|
|
|
|
|
|
|
def is_mistral_model(model_id: str) -> bool: |
|
|
"""Check if model uses Mistral SDK""" |
|
|
return model_id in ["codestral-2508", "mistral-medium-2508"] |
|
|
|
|
|
|