mgbam commited on
Commit
0730dbb
·
verified ·
1 Parent(s): 9006997

Create models.py

Browse files
Files changed (1) hide show
  1. models.py +273 -0
models.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from huggingface_hub import InferenceClient
3
+ from openai import OpenAI
4
+ from mistralai import Mistral
5
+
6
+ AVAILABLE_MODELS = [
7
+ {
8
+ "name": "Moonshot Kimi-K2",
9
+ "id": "moonshotai/Kimi-K2-Instruct",
10
+ "description": "Moonshot AI Kimi-K2-Instruct model for code generation and general tasks"
11
+ },
12
+ {
13
+ "name": "Kimi K2 Turbo (Preview)",
14
+ "id": "kimi-k2-turbo-preview",
15
+ "description": "Moonshot AI Kimi K2 Turbo via OpenAI-compatible API"
16
+ },
17
+ {
18
+ "name": "DeepSeek V3",
19
+ "id": "deepseek-ai/DeepSeek-V3-0324",
20
+ "description": "DeepSeek V3 model for code generation"
21
+ },
22
+ {
23
+ "name": "DeepSeek V3.1",
24
+ "id": "deepseek-ai/DeepSeek-V3.1",
25
+ "description": "DeepSeek V3.1 model for code generation and general tasks"
26
+ },
27
+ {
28
+ "name": "DeepSeek R1",
29
+ "id": "deepseek-ai/DeepSeek-R1-0528",
30
+ "description": "DeepSeek R1 model for code generation"
31
+ },
32
+ {
33
+ "name": "ERNIE-4.5-VL",
34
+ "id": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT",
35
+ "description": "ERNIE-4.5-VL model for multimodal code generation with image support"
36
+ },
37
+ {
38
+ "name": "MiniMax M1",
39
+ "id": "MiniMaxAI/MiniMax-M1-80k",
40
+ "description": "MiniMax M1 model for code generation and general tasks"
41
+ },
42
+ {
43
+ "name": "Qwen3-235B-A22B",
44
+ "id": "Qwen/Qwen3-235B-A22B",
45
+ "description": "Qwen3-235B-A22B model for code generation and general tasks"
46
+ },
47
+ {
48
+ "name": "SmolLM3-3B",
49
+ "id": "HuggingFaceTB/SmolLM3-3B",
50
+ "description": "SmolLM3-3B model for code generation and general tasks"
51
+ },
52
+ {
53
+ "name": "GLM-4.5",
54
+ "id": "zai-org/GLM-4.5",
55
+ "description": "GLM-4.5 model with thinking capabilities for advanced code generation"
56
+ },
57
+ {
58
+ "name": "GLM-4.5V",
59
+ "id": "zai-org/GLM-4.5V",
60
+ "description": "GLM-4.5V multimodal model with image understanding for code generation"
61
+ },
62
+ {
63
+ "name": "GLM-4.1V-9B-Thinking",
64
+ "id": "THUDM/GLM-4.1V-9B-Thinking",
65
+ "description": "GLM-4.1V-9B-Thinking model for multimodal code generation with image support"
66
+ },
67
+ {
68
+ "name": "Qwen3-235B-A22B-Instruct-2507",
69
+ "id": "Qwen/Qwen3-235B-A22B-Instruct-2507",
70
+ "description": "Qwen3-235B-A22B-Instruct-2507 model for code generation and general tasks"
71
+ },
72
+ {
73
+ "name": "Qwen3-Coder-480B-A35B-Instruct",
74
+ "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
75
+ "description": "Qwen3-Coder-480B-A35B-Instruct model for advanced code generation and programming tasks"
76
+ },
77
+ {
78
+ "name": "Qwen3-32B",
79
+ "id": "Qwen/Qwen3-32B",
80
+ "description": "Qwen3-32B model for code generation and general tasks"
81
+ },
82
+ {
83
+ "name": "Qwen3-4B-Instruct-2507",
84
+ "id": "Qwen/Qwen3-4B-Instruct-2507",
85
+ "description": "Qwen3-4B-Instruct-2507 model for code generation and general tasks"
86
+ },
87
+ {
88
+ "name": "Qwen3-4B-Thinking-2507",
89
+ "id": "Qwen/Qwen3-4B-Thinking-2507",
90
+ "description": "Qwen3-4B-Thinking-2507 model with advanced reasoning capabilities for code generation and general tasks"
91
+ },
92
+ {
93
+ "name": "Qwen3-235B-A22B-Thinking",
94
+ "id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
95
+ "description": "Qwen3-235B-A22B-Thinking model with advanced reasoning capabilities"
96
+ },
97
+ {
98
+ "name": "Qwen3-30B-A3B-Instruct-2507",
99
+ "id": "qwen3-30b-a3b-instruct-2507",
100
+ "description": "Qwen3-30B-A3B-Instruct model via Alibaba Cloud DashScope API"
101
+ },
102
+ {
103
+ "name": "Qwen3-30B-A3B-Thinking-2507",
104
+ "id": "qwen3-30b-a3b-thinking-2507",
105
+ "description": "Qwen3-30B-A3B-Thinking model with advanced reasoning via Alibaba Cloud DashScope API"
106
+ },
107
+ {
108
+ "name": "Qwen3-Coder-30B-A3B-Instruct",
109
+ "id": "qwen3-coder-30b-a3b-instruct",
110
+ "description": "Qwen3-Coder-30B-A3B-Instruct model for advanced code generation via Alibaba Cloud DashScope API"
111
+ },
112
+ {
113
+ "name": "Cohere Command-A Reasoning 08-2025",
114
+ "id": "CohereLabs/command-a-reasoning-08-2025",
115
+ "description": "Cohere Labs Command-A Reasoning (Aug 2025) via Hugging Face InferenceClient"
116
+ },
117
+ {
118
+ "name": "StepFun Step-3",
119
+ "id": "step-3",
120
+ "description": "StepFun Step-3 model - AI chat assistant by 阶跃星辰 with multilingual capabilities"
121
+ },
122
+ {
123
+ "name": "Codestral 2508",
124
+ "id": "codestral-2508",
125
+ "description": "Mistral Codestral model - specialized for code generation and programming tasks"
126
+ },
127
+ {
128
+ "name": "Mistral Medium 2508",
129
+ "id": "mistral-medium-2508",
130
+ "description": "Mistral Medium 2508 model via Mistral API for general tasks and coding"
131
+ },
132
+ {
133
+ "name": "Gemini 2.5 Flash",
134
+ "id": "gemini-2.5-flash",
135
+ "description": "Google Gemini 2.5 Flash via OpenAI-compatible API"
136
+ },
137
+ {
138
+ "name": "Gemini 2.5 Pro",
139
+ "id": "gemini-2.5-pro",
140
+ "description": "Google Gemini 2.5 Pro via OpenAI-compatible API"
141
+ },
142
+ {
143
+ "name": "GPT-OSS-120B",
144
+ "id": "openai/gpt-oss-120b",
145
+ "description": "OpenAI GPT-OSS-120B model for advanced code generation and general tasks"
146
+ },
147
+ {
148
+ "name": "GPT-OSS-20B",
149
+ "id": "openai/gpt-oss-20b",
150
+ "description": "OpenAI GPT-OSS-20B model for code generation and general tasks"
151
+ },
152
+ {
153
+ "name": "GPT-5",
154
+ "id": "gpt-5",
155
+ "description": "OpenAI GPT-5 model for advanced code generation and general tasks"
156
+ },
157
+ {
158
+ "name": "Grok-4",
159
+ "id": "grok-4",
160
+ "description": "Grok-4 model via Poe (OpenAI-compatible) for advanced tasks"
161
+ },
162
+ {
163
+ "name": "Claude-Opus-4.1",
164
+ "id": "claude-opus-4.1",
165
+ "description": "Anthropic Claude Opus 4.1 via Poe (OpenAI-compatible)"
166
+ }
167
+ ]
168
+
169
+ # Default model selection
170
+ DEFAULT_MODEL_NAME = "Qwen3-Coder-480B-A35B-Instruct"
171
+ DEFAULT_MODEL = None
172
+ for _m in AVAILABLE_MODELS:
173
+ if _m.get("name") == DEFAULT_MODEL_NAME:
174
+ DEFAULT_MODEL = _m
175
+ break
176
+ if DEFAULT_MODEL is None and AVAILABLE_MODELS:
177
+ DEFAULT_MODEL = AVAILABLE_MODELS[0]
178
+
179
+ # HF Inference Client
180
+ HF_TOKEN = os.getenv('HF_TOKEN')
181
+ if not HF_TOKEN:
182
+ raise RuntimeError("HF_TOKEN environment variable is not set. Please set it to your Hugging Face API token.")
183
+
184
+ def get_inference_client(model_id, provider="auto"):
185
+ """Return an InferenceClient with provider based on model_id and user selection."""
186
+ if model_id == "qwen3-30b-a3b-instruct-2507":
187
+ # Use DashScope OpenAI client
188
+ return OpenAI(
189
+ api_key=os.getenv("DASHSCOPE_API_KEY"),
190
+ base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
191
+ )
192
+ elif model_id == "qwen3-30b-a3b-thinking-2507":
193
+ # Use DashScope OpenAI client for Thinking model
194
+ return OpenAI(
195
+ api_key=os.getenv("DASHSCOPE_API_KEY"),
196
+ base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
197
+ )
198
+ elif model_id == "qwen3-coder-30b-a3b-instruct":
199
+ # Use DashScope OpenAI client for Coder model
200
+ return OpenAI(
201
+ api_key=os.getenv("DASHSCOPE_API_KEY"),
202
+ base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
203
+ )
204
+ elif model_id == "gpt-5":
205
+ # Use Poe (OpenAI-compatible) client for GPT-5 model
206
+ return OpenAI(
207
+ api_key=os.getenv("POE_API_KEY"),
208
+ base_url="https://api.poe.com/v1"
209
+ )
210
+ elif model_id == "grok-4":
211
+ # Use Poe (OpenAI-compatible) client for Grok-4 model
212
+ return OpenAI(
213
+ api_key=os.getenv("POE_API_KEY"),
214
+ base_url="https://api.poe.com/v1"
215
+ )
216
+ elif model_id == "claude-opus-4.1":
217
+ # Use Poe (OpenAI-compatible) client for Claude-Opus-4.1
218
+ return OpenAI(
219
+ api_key=os.getenv("POE_API_KEY"),
220
+ base_url="https://api.poe.com/v1"
221
+ )
222
+ elif model_id == "step-3":
223
+ # Use StepFun API client for Step-3 model
224
+ return OpenAI(
225
+ api_key=os.getenv("STEP_API_KEY"),
226
+ base_url="https://api.stepfun.com/v1"
227
+ )
228
+ elif model_id == "codestral-2508" or model_id == "mistral-medium-2508":
229
+ # Use Mistral client for Mistral models
230
+ return Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
231
+ elif model_id == "gemini-2.5-flash":
232
+ # Use Google Gemini (OpenAI-compatible) client
233
+ return OpenAI(
234
+ api_key=os.getenv("GEMINI_API_KEY"),
235
+ base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
236
+ )
237
+ elif model_id == "gemini-2.5-pro":
238
+ # Use Google Gemini Pro (OpenAI-compatible) client
239
+ return OpenAI(
240
+ api_key=os.getenv("GEMINI_API_KEY"),
241
+ base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
242
+ )
243
+ elif model_id == "kimi-k2-turbo-preview":
244
+ # Use Moonshot AI (OpenAI-compatible) client for Kimi K2 Turbo (Preview)
245
+ return OpenAI(
246
+ api_key=os.getenv("MOONSHOT_API_KEY"),
247
+ base_url="https://api.moonshot.ai/v1",
248
+ )
249
+ elif model_id == "openai/gpt-oss-120b":
250
+ provider = "groq"
251
+ elif model_id == "openai/gpt-oss-20b":
252
+ provider = "groq"
253
+ elif model_id == "moonshotai/Kimi-K2-Instruct":
254
+ provider = "groq"
255
+ elif model_id == "Qwen/Qwen3-235B-A22B":
256
+ provider = "cerebras"
257
+ elif model_id == "Qwen/Qwen3-235B-A22B-Instruct-2507":
258
+ provider = "cerebras"
259
+ elif model_id == "Qwen/Qwen3-32B":
260
+ provider = "cerebras"
261
+ elif model_id == "Qwen/Qwen3-235B-A22B-Thinking-2507":
262
+ provider = "cerebras"
263
+ elif model_id == "Qwen/Qwen3-Coder-480B-A35B-Instruct":
264
+ provider = "cerebras"
265
+ elif model_id == "deepseek-ai/DeepSeek-V3.1":
266
+ provider = "novita"
267
+ elif model_id == "zai-org/GLM-4.5":
268
+ provider = "fireworks-ai"
269
+ return InferenceClient(
270
+ provider=provider,
271
+ api_key=HF_TOKEN,
272
+ bill_to="huggingface"
273
+ )