|
|
import os |
|
|
from huggingface_hub import InferenceClient |
|
|
from openai import OpenAI |
|
|
from mistralai import Mistral |
|
|
|
|
|
AVAILABLE_MODELS = [ |
|
|
{ |
|
|
"name": "Moonshot Kimi-K2", |
|
|
"id": "moonshotai/Kimi-K2-Instruct", |
|
|
"description": "Moonshot AI Kimi-K2-Instruct model for code generation and general tasks" |
|
|
}, |
|
|
{ |
|
|
"name": "Kimi K2 Turbo (Preview)", |
|
|
"id": "kimi-k2-turbo-preview", |
|
|
"description": "Moonshot AI Kimi K2 Turbo via OpenAI-compatible API" |
|
|
}, |
|
|
{ |
|
|
"name": "DeepSeek V3", |
|
|
"id": "deepseek-ai/DeepSeek-V3-0324", |
|
|
"description": "DeepSeek V3 model for code generation" |
|
|
}, |
|
|
{ |
|
|
"name": "DeepSeek V3.1", |
|
|
"id": "deepseek-ai/DeepSeek-V3.1", |
|
|
"description": "DeepSeek V3.1 model for code generation and general tasks" |
|
|
}, |
|
|
{ |
|
|
"name": "DeepSeek R1", |
|
|
"id": "deepseek-ai/DeepSeek-R1-0528", |
|
|
"description": "DeepSeek R1 model for code generation" |
|
|
}, |
|
|
{ |
|
|
"name": "ERNIE-4.5-VL", |
|
|
"id": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT", |
|
|
"description": "ERNIE-4.5-VL model for multimodal code generation with image support" |
|
|
}, |
|
|
{ |
|
|
"name": "MiniMax M1", |
|
|
"id": "MiniMaxAI/MiniMax-M1-80k", |
|
|
"description": "MiniMax M1 model for code generation and general tasks" |
|
|
}, |
|
|
{ |
|
|
"name": "Qwen3-235B-A22B", |
|
|
"id": "Qwen/Qwen3-235B-A22B", |
|
|
"description": "Qwen3-235B-A22B model for code generation and general tasks" |
|
|
}, |
|
|
{ |
|
|
"name": "SmolLM3-3B", |
|
|
"id": "HuggingFaceTB/SmolLM3-3B", |
|
|
"description": "SmolLM3-3B model for code generation and general tasks" |
|
|
}, |
|
|
{ |
|
|
"name": "GLM-4.5", |
|
|
"id": "zai-org/GLM-4.5", |
|
|
"description": "GLM-4.5 model with thinking capabilities for advanced code generation" |
|
|
}, |
|
|
{ |
|
|
"name": "GLM-4.5V", |
|
|
"id": "zai-org/GLM-4.5V", |
|
|
"description": "GLM-4.5V multimodal model with image understanding for code generation" |
|
|
}, |
|
|
{ |
|
|
"name": "GLM-4.1V-9B-Thinking", |
|
|
"id": "THUDM/GLM-4.1V-9B-Thinking", |
|
|
"description": "GLM-4.1V-9B-Thinking model for multimodal code generation with image support" |
|
|
}, |
|
|
{ |
|
|
"name": "Qwen3-235B-A22B-Instruct-2507", |
|
|
"id": "Qwen/Qwen3-235B-A22B-Instruct-2507", |
|
|
"description": "Qwen3-235B-A22B-Instruct-2507 model for code generation and general tasks" |
|
|
}, |
|
|
{ |
|
|
"name": "Qwen3-Coder-480B-A35B-Instruct", |
|
|
"id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", |
|
|
"description": "Qwen3-Coder-480B-A35B-Instruct model for advanced code generation and programming tasks" |
|
|
}, |
|
|
{ |
|
|
"name": "Qwen3-32B", |
|
|
"id": "Qwen/Qwen3-32B", |
|
|
"description": "Qwen3-32B model for code generation and general tasks" |
|
|
}, |
|
|
{ |
|
|
"name": "Qwen3-4B-Instruct-2507", |
|
|
"id": "Qwen/Qwen3-4B-Instruct-2507", |
|
|
"description": "Qwen3-4B-Instruct-2507 model for code generation and general tasks" |
|
|
}, |
|
|
{ |
|
|
"name": "Qwen3-4B-Thinking-2507", |
|
|
"id": "Qwen/Qwen3-4B-Thinking-2507", |
|
|
"description": "Qwen3-4B-Thinking-2507 model with advanced reasoning capabilities for code generation and general tasks" |
|
|
}, |
|
|
{ |
|
|
"name": "Qwen3-235B-A22B-Thinking", |
|
|
"id": "Qwen/Qwen3-235B-A22B-Thinking-2507", |
|
|
"description": "Qwen3-235B-A22B-Thinking model with advanced reasoning capabilities" |
|
|
}, |
|
|
{ |
|
|
"name": "Qwen3-30B-A3B-Instruct-2507", |
|
|
"id": "qwen3-30b-a3b-instruct-2507", |
|
|
"description": "Qwen3-30B-A3B-Instruct model via Alibaba Cloud DashScope API" |
|
|
}, |
|
|
{ |
|
|
"name": "Qwen3-30B-A3B-Thinking-2507", |
|
|
"id": "qwen3-30b-a3b-thinking-2507", |
|
|
"description": "Qwen3-30B-A3B-Thinking model with advanced reasoning via Alibaba Cloud DashScope API" |
|
|
}, |
|
|
{ |
|
|
"name": "Qwen3-Coder-30B-A3B-Instruct", |
|
|
"id": "qwen3-coder-30b-a3b-instruct", |
|
|
"description": "Qwen3-Coder-30B-A3B-Instruct model for advanced code generation via Alibaba Cloud DashScope API" |
|
|
}, |
|
|
{ |
|
|
"name": "Cohere Command-A Reasoning 08-2025", |
|
|
"id": "CohereLabs/command-a-reasoning-08-2025", |
|
|
"description": "Cohere Labs Command-A Reasoning (Aug 2025) via Hugging Face InferenceClient" |
|
|
}, |
|
|
{ |
|
|
"name": "StepFun Step-3", |
|
|
"id": "step-3", |
|
|
"description": "StepFun Step-3 model - AI chat assistant by 阶跃星辰 with multilingual capabilities" |
|
|
}, |
|
|
{ |
|
|
"name": "Codestral 2508", |
|
|
"id": "codestral-2508", |
|
|
"description": "Mistral Codestral model - specialized for code generation and programming tasks" |
|
|
}, |
|
|
{ |
|
|
"name": "Mistral Medium 2508", |
|
|
"id": "mistral-medium-2508", |
|
|
"description": "Mistral Medium 2508 model via Mistral API for general tasks and coding" |
|
|
}, |
|
|
{ |
|
|
"name": "Gemini 2.5 Flash", |
|
|
"id": "gemini-2.5-flash", |
|
|
"description": "Google Gemini 2.5 Flash via OpenAI-compatible API" |
|
|
}, |
|
|
{ |
|
|
"name": "Gemini 2.5 Pro", |
|
|
"id": "gemini-2.5-pro", |
|
|
"description": "Google Gemini 2.5 Pro via OpenAI-compatible API" |
|
|
}, |
|
|
{ |
|
|
"name": "GPT-OSS-120B", |
|
|
"id": "openai/gpt-oss-120b", |
|
|
"description": "OpenAI GPT-OSS-120B model for advanced code generation and general tasks" |
|
|
}, |
|
|
{ |
|
|
"name": "GPT-OSS-20B", |
|
|
"id": "openai/gpt-oss-20b", |
|
|
"description": "OpenAI GPT-OSS-20B model for code generation and general tasks" |
|
|
}, |
|
|
{ |
|
|
"name": "GPT-5", |
|
|
"id": "gpt-5", |
|
|
"description": "OpenAI GPT-5 model for advanced code generation and general tasks" |
|
|
}, |
|
|
{ |
|
|
"name": "Grok-4", |
|
|
"id": "grok-4", |
|
|
"description": "Grok-4 model via Poe (OpenAI-compatible) for advanced tasks" |
|
|
}, |
|
|
{ |
|
|
"name": "Claude-Opus-4.1", |
|
|
"id": "claude-opus-4.1", |
|
|
"description": "Anthropic Claude Opus 4.1 via Poe (OpenAI-compatible)" |
|
|
} |
|
|
] |
|
|
|
|
|
|
|
|
DEFAULT_MODEL_NAME = "Qwen3-Coder-480B-A35B-Instruct" |
|
|
DEFAULT_MODEL = None |
|
|
for _m in AVAILABLE_MODELS: |
|
|
if _m.get("name") == DEFAULT_MODEL_NAME: |
|
|
DEFAULT_MODEL = _m |
|
|
break |
|
|
if DEFAULT_MODEL is None and AVAILABLE_MODELS: |
|
|
DEFAULT_MODEL = AVAILABLE_MODELS[0] |
|
|
|
|
|
|
|
|
HF_TOKEN = os.getenv('HF_TOKEN') |
|
|
if not HF_TOKEN: |
|
|
raise RuntimeError("HF_TOKEN environment variable is not set. Please set it to your Hugging Face API token.") |
|
|
|
|
|
def get_inference_client(model_id, provider="auto"): |
|
|
"""Return an InferenceClient with provider based on model_id and user selection.""" |
|
|
if model_id == "qwen3-30b-a3b-instruct-2507": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("DASHSCOPE_API_KEY"), |
|
|
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", |
|
|
) |
|
|
elif model_id == "qwen3-30b-a3b-thinking-2507": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("DASHSCOPE_API_KEY"), |
|
|
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", |
|
|
) |
|
|
elif model_id == "qwen3-coder-30b-a3b-instruct": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("DASHSCOPE_API_KEY"), |
|
|
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", |
|
|
) |
|
|
elif model_id == "gpt-5": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("POE_API_KEY"), |
|
|
base_url="https://api.poe.com/v1" |
|
|
) |
|
|
elif model_id == "grok-4": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("POE_API_KEY"), |
|
|
base_url="https://api.poe.com/v1" |
|
|
) |
|
|
elif model_id == "claude-opus-4.1": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("POE_API_KEY"), |
|
|
base_url="https://api.poe.com/v1" |
|
|
) |
|
|
elif model_id == "step-3": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("STEP_API_KEY"), |
|
|
base_url="https://api.stepfun.com/v1" |
|
|
) |
|
|
elif model_id == "codestral-2508" or model_id == "mistral-medium-2508": |
|
|
|
|
|
return Mistral(api_key=os.getenv("MISTRAL_API_KEY")) |
|
|
elif model_id == "gemini-2.5-flash": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("GEMINI_API_KEY"), |
|
|
base_url="https://generativelanguage.googleapis.com/v1beta/openai/", |
|
|
) |
|
|
elif model_id == "gemini-2.5-pro": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("GEMINI_API_KEY"), |
|
|
base_url="https://generativelanguage.googleapis.com/v1beta/openai/", |
|
|
) |
|
|
elif model_id == "kimi-k2-turbo-preview": |
|
|
|
|
|
return OpenAI( |
|
|
api_key=os.getenv("MOONSHOT_API_KEY"), |
|
|
base_url="https://api.moonshot.ai/v1", |
|
|
) |
|
|
elif model_id == "openai/gpt-oss-120b": |
|
|
provider = "groq" |
|
|
elif model_id == "openai/gpt-oss-20b": |
|
|
provider = "groq" |
|
|
elif model_id == "moonshotai/Kimi-K2-Instruct": |
|
|
provider = "groq" |
|
|
elif model_id == "Qwen/Qwen3-235B-A22B": |
|
|
provider = "cerebras" |
|
|
elif model_id == "Qwen/Qwen3-235B-A22B-Instruct-2507": |
|
|
provider = "cerebras" |
|
|
elif model_id == "Qwen/Qwen3-32B": |
|
|
provider = "cerebras" |
|
|
elif model_id == "Qwen/Qwen3-235B-A22B-Thinking-2507": |
|
|
provider = "cerebras" |
|
|
elif model_id == "Qwen/Qwen3-Coder-480B-A35B-Instruct": |
|
|
provider = "cerebras" |
|
|
elif model_id == "deepseek-ai/DeepSeek-V3.1": |
|
|
provider = "novita" |
|
|
elif model_id == "zai-org/GLM-4.5": |
|
|
provider = "fireworks-ai" |
|
|
return InferenceClient( |
|
|
provider=provider, |
|
|
api_key=HF_TOKEN, |
|
|
bill_to="huggingface" |
|
|
) |