File size: 10,107 Bytes
0730dbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
import os
from huggingface_hub import InferenceClient
from openai import OpenAI
from mistralai import Mistral

AVAILABLE_MODELS = [
    {
        "name": "Moonshot Kimi-K2",
        "id": "moonshotai/Kimi-K2-Instruct",
        "description": "Moonshot AI Kimi-K2-Instruct model for code generation and general tasks"
    },
    {
        "name": "Kimi K2 Turbo (Preview)",
        "id": "kimi-k2-turbo-preview",
        "description": "Moonshot AI Kimi K2 Turbo via OpenAI-compatible API"
    },
    {
        "name": "DeepSeek V3",
        "id": "deepseek-ai/DeepSeek-V3-0324",
        "description": "DeepSeek V3 model for code generation"
    },
    {
        "name": "DeepSeek V3.1",
        "id": "deepseek-ai/DeepSeek-V3.1",
        "description": "DeepSeek V3.1 model for code generation and general tasks"
    },
    {
        "name": "DeepSeek R1", 
        "id": "deepseek-ai/DeepSeek-R1-0528",
        "description": "DeepSeek R1 model for code generation"
    },
    {
        "name": "ERNIE-4.5-VL",
        "id": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT",
        "description": "ERNIE-4.5-VL model for multimodal code generation with image support"
    },
    {
        "name": "MiniMax M1",
        "id": "MiniMaxAI/MiniMax-M1-80k",
        "description": "MiniMax M1 model for code generation and general tasks"
    },
    {
        "name": "Qwen3-235B-A22B",
        "id": "Qwen/Qwen3-235B-A22B",
        "description": "Qwen3-235B-A22B model for code generation and general tasks"
    },
    {
        "name": "SmolLM3-3B",
        "id": "HuggingFaceTB/SmolLM3-3B",
        "description": "SmolLM3-3B model for code generation and general tasks"
    },
    {
        "name": "GLM-4.5",
        "id": "zai-org/GLM-4.5",
        "description": "GLM-4.5 model with thinking capabilities for advanced code generation"
    },
    {
        "name": "GLM-4.5V",
        "id": "zai-org/GLM-4.5V",
        "description": "GLM-4.5V multimodal model with image understanding for code generation"
    },
    {
        "name": "GLM-4.1V-9B-Thinking",
        "id": "THUDM/GLM-4.1V-9B-Thinking",
        "description": "GLM-4.1V-9B-Thinking model for multimodal code generation with image support"
    },
    {
        "name": "Qwen3-235B-A22B-Instruct-2507",
        "id": "Qwen/Qwen3-235B-A22B-Instruct-2507",
        "description": "Qwen3-235B-A22B-Instruct-2507 model for code generation and general tasks"
    },
    {
        "name": "Qwen3-Coder-480B-A35B-Instruct",
        "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
        "description": "Qwen3-Coder-480B-A35B-Instruct model for advanced code generation and programming tasks"
    },
    {
        "name": "Qwen3-32B",
        "id": "Qwen/Qwen3-32B",
        "description": "Qwen3-32B model for code generation and general tasks"
    },
    {
        "name": "Qwen3-4B-Instruct-2507",
        "id": "Qwen/Qwen3-4B-Instruct-2507",
        "description": "Qwen3-4B-Instruct-2507 model for code generation and general tasks"
    },
    {
        "name": "Qwen3-4B-Thinking-2507",
        "id": "Qwen/Qwen3-4B-Thinking-2507",
        "description": "Qwen3-4B-Thinking-2507 model with advanced reasoning capabilities for code generation and general tasks"
    },
    {
        "name": "Qwen3-235B-A22B-Thinking",
        "id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
        "description": "Qwen3-235B-A22B-Thinking model with advanced reasoning capabilities"
    },
    {
        "name": "Qwen3-30B-A3B-Instruct-2507",
        "id": "qwen3-30b-a3b-instruct-2507",
        "description": "Qwen3-30B-A3B-Instruct model via Alibaba Cloud DashScope API"
    },
    {
        "name": "Qwen3-30B-A3B-Thinking-2507",
        "id": "qwen3-30b-a3b-thinking-2507",
        "description": "Qwen3-30B-A3B-Thinking model with advanced reasoning via Alibaba Cloud DashScope API"
    },
    {
        "name": "Qwen3-Coder-30B-A3B-Instruct",
        "id": "qwen3-coder-30b-a3b-instruct",
        "description": "Qwen3-Coder-30B-A3B-Instruct model for advanced code generation via Alibaba Cloud DashScope API"
    },
    {
        "name": "Cohere Command-A Reasoning 08-2025",
        "id": "CohereLabs/command-a-reasoning-08-2025",
        "description": "Cohere Labs Command-A Reasoning (Aug 2025) via Hugging Face InferenceClient"
    },
    {
        "name": "StepFun Step-3",
        "id": "step-3",
        "description": "StepFun Step-3 model - AI chat assistant by 阶跃星辰 with multilingual capabilities"
    },
    {
        "name": "Codestral 2508",
        "id": "codestral-2508",
        "description": "Mistral Codestral model - specialized for code generation and programming tasks"
    },
    {
        "name": "Mistral Medium 2508",
        "id": "mistral-medium-2508",
        "description": "Mistral Medium 2508 model via Mistral API for general tasks and coding"
    },
    {
        "name": "Gemini 2.5 Flash",
        "id": "gemini-2.5-flash",
        "description": "Google Gemini 2.5 Flash via OpenAI-compatible API"
    },
    {
        "name": "Gemini 2.5 Pro",
        "id": "gemini-2.5-pro",
        "description": "Google Gemini 2.5 Pro via OpenAI-compatible API"
    },
    {
        "name": "GPT-OSS-120B",
        "id": "openai/gpt-oss-120b",
        "description": "OpenAI GPT-OSS-120B model for advanced code generation and general tasks"
    },
    {
        "name": "GPT-OSS-20B",
        "id": "openai/gpt-oss-20b",
        "description": "OpenAI GPT-OSS-20B model for code generation and general tasks"
    },
    {
        "name": "GPT-5",
        "id": "gpt-5",
        "description": "OpenAI GPT-5 model for advanced code generation and general tasks"
    },
    {
        "name": "Grok-4",
        "id": "grok-4",
        "description": "Grok-4 model via Poe (OpenAI-compatible) for advanced tasks"
    },
    {
        "name": "Claude-Opus-4.1",
        "id": "claude-opus-4.1",
        "description": "Anthropic Claude Opus 4.1 via Poe (OpenAI-compatible)"
    }
]

# Default model selection
DEFAULT_MODEL_NAME = "Qwen3-Coder-480B-A35B-Instruct"
DEFAULT_MODEL = None
for _m in AVAILABLE_MODELS:
    if _m.get("name") == DEFAULT_MODEL_NAME:
        DEFAULT_MODEL = _m
        break
if DEFAULT_MODEL is None and AVAILABLE_MODELS:
    DEFAULT_MODEL = AVAILABLE_MODELS[0]

# HF Inference Client
HF_TOKEN = os.getenv('HF_TOKEN')
if not HF_TOKEN:
    raise RuntimeError("HF_TOKEN environment variable is not set. Please set it to your Hugging Face API token.")

def get_inference_client(model_id, provider="auto"):
    """Return an InferenceClient with provider based on model_id and user selection."""
    if model_id == "qwen3-30b-a3b-instruct-2507":
        # Use DashScope OpenAI client
        return OpenAI(
            api_key=os.getenv("DASHSCOPE_API_KEY"),
            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
        )
    elif model_id == "qwen3-30b-a3b-thinking-2507":
        # Use DashScope OpenAI client for Thinking model
        return OpenAI(
            api_key=os.getenv("DASHSCOPE_API_KEY"),
            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
        )
    elif model_id == "qwen3-coder-30b-a3b-instruct":
        # Use DashScope OpenAI client for Coder model
        return OpenAI(
            api_key=os.getenv("DASHSCOPE_API_KEY"),
            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
        )
    elif model_id == "gpt-5":
        # Use Poe (OpenAI-compatible) client for GPT-5 model
        return OpenAI(
            api_key=os.getenv("POE_API_KEY"),
            base_url="https://api.poe.com/v1"
        )
    elif model_id == "grok-4":
        # Use Poe (OpenAI-compatible) client for Grok-4 model
        return OpenAI(
            api_key=os.getenv("POE_API_KEY"),
            base_url="https://api.poe.com/v1"
        )
    elif model_id == "claude-opus-4.1":
        # Use Poe (OpenAI-compatible) client for Claude-Opus-4.1
        return OpenAI(
            api_key=os.getenv("POE_API_KEY"),
            base_url="https://api.poe.com/v1"
        )
    elif model_id == "step-3":
        # Use StepFun API client for Step-3 model
        return OpenAI(
            api_key=os.getenv("STEP_API_KEY"),
            base_url="https://api.stepfun.com/v1"
        )
    elif model_id == "codestral-2508" or model_id == "mistral-medium-2508":
        # Use Mistral client for Mistral models
        return Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
    elif model_id == "gemini-2.5-flash":
        # Use Google Gemini (OpenAI-compatible) client
        return OpenAI(
            api_key=os.getenv("GEMINI_API_KEY"),
            base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
        )
    elif model_id == "gemini-2.5-pro":
        # Use Google Gemini Pro (OpenAI-compatible) client
        return OpenAI(
            api_key=os.getenv("GEMINI_API_KEY"),
            base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
        )
    elif model_id == "kimi-k2-turbo-preview":
        # Use Moonshot AI (OpenAI-compatible) client for Kimi K2 Turbo (Preview)
        return OpenAI(
            api_key=os.getenv("MOONSHOT_API_KEY"),
            base_url="https://api.moonshot.ai/v1",
        )
    elif model_id == "openai/gpt-oss-120b":
        provider = "groq"
    elif model_id == "openai/gpt-oss-20b":
        provider = "groq"
    elif model_id == "moonshotai/Kimi-K2-Instruct":
        provider = "groq"
    elif model_id == "Qwen/Qwen3-235B-A22B":
        provider = "cerebras"
    elif model_id == "Qwen/Qwen3-235B-A22B-Instruct-2507":
        provider = "cerebras"
    elif model_id == "Qwen/Qwen3-32B":
        provider = "cerebras"
    elif model_id == "Qwen/Qwen3-235B-A22B-Thinking-2507":
        provider = "cerebras"
    elif model_id == "Qwen/Qwen3-Coder-480B-A35B-Instruct":
        provider = "cerebras"
    elif model_id == "deepseek-ai/DeepSeek-V3.1":
        provider = "novita"
    elif model_id == "zai-org/GLM-4.5":
        provider = "fireworks-ai"
    return InferenceClient(
        provider=provider,
        api_key=HF_TOKEN,
        bill_to="huggingface"
    )