khairul91 commited on
Commit
7e8f5c6
·
verified ·
1 Parent(s): ddf45da

Delete anycoder_app/models.py

Browse files
Files changed (1) hide show
  1. anycoder_app/models.py +0 -358
anycoder_app/models.py DELETED
@@ -1,358 +0,0 @@
1
- """
2
- Model inference and client management for AnyCoder.
3
- Handles different model providers and inference clients.
4
- """
5
- import os
6
- from typing import Dict, List, Optional, Tuple
7
- import re
8
- from http import HTTPStatus
9
-
10
- from huggingface_hub import InferenceClient
11
- from openai import OpenAI
12
- from mistralai import Mistral
13
- import dashscope
14
- from google import genai
15
- from google.genai import types
16
-
17
- from .config import HF_TOKEN, AVAILABLE_MODELS
18
-
19
- # Type definitions
20
- History = List[Dict[str, str]]
21
- Messages = List[Dict[str, str]]
22
-
23
- def get_inference_client(model_id, provider="auto"):
24
- """Return an InferenceClient with provider based on model_id and user selection."""
25
- if model_id == "gemini-3.0-pro":
26
- # Use Poe (OpenAI-compatible) client for Gemini 3.0 Pro
27
- return OpenAI(
28
- api_key=os.getenv("POE_API_KEY"),
29
- base_url="https://api.poe.com/v1"
30
- )
31
- elif model_id == "qwen3-30b-a3b-instruct-2507":
32
- # Use DashScope OpenAI client
33
- return OpenAI(
34
- api_key=os.getenv("DASHSCOPE_API_KEY"),
35
- base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
36
- )
37
- elif model_id == "qwen3-30b-a3b-thinking-2507":
38
- # Use DashScope OpenAI client for Thinking model
39
- return OpenAI(
40
- api_key=os.getenv("DASHSCOPE_API_KEY"),
41
- base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
42
- )
43
- elif model_id == "qwen3-coder-30b-a3b-instruct":
44
- # Use DashScope OpenAI client for Coder model
45
- return OpenAI(
46
- api_key=os.getenv("DASHSCOPE_API_KEY"),
47
- base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
48
- )
49
- elif model_id == "gpt-5":
50
- # Use Poe (OpenAI-compatible) client for GPT-5 model
51
- return OpenAI(
52
- api_key=os.getenv("POE_API_KEY"),
53
- base_url="https://api.poe.com/v1"
54
- )
55
- elif model_id == "gpt-5.1":
56
- # Use Poe (OpenAI-compatible) client for GPT-5.1 model
57
- return OpenAI(
58
- api_key=os.getenv("POE_API_KEY"),
59
- base_url="https://api.poe.com/v1"
60
- )
61
- elif model_id == "gpt-5.1-instant":
62
- # Use Poe (OpenAI-compatible) client for GPT-5.1 Instant model
63
- return OpenAI(
64
- api_key=os.getenv("POE_API_KEY"),
65
- base_url="https://api.poe.com/v1"
66
- )
67
- elif model_id == "gpt-5.1-codex":
68
- # Use Poe (OpenAI-compatible) client for GPT-5.1 Codex model
69
- return OpenAI(
70
- api_key=os.getenv("POE_API_KEY"),
71
- base_url="https://api.poe.com/v1"
72
- )
73
- elif model_id == "gpt-5.1-codex-mini":
74
- # Use Poe (OpenAI-compatible) client for GPT-5.1 Codex Mini model
75
- return OpenAI(
76
- api_key=os.getenv("POE_API_KEY"),
77
- base_url="https://api.poe.com/v1"
78
- )
79
- elif model_id == "grok-4":
80
- # Use Poe (OpenAI-compatible) client for Grok-4 model
81
- return OpenAI(
82
- api_key=os.getenv("POE_API_KEY"),
83
- base_url="https://api.poe.com/v1"
84
- )
85
- elif model_id == "Grok-Code-Fast-1":
86
- # Use Poe (OpenAI-compatible) client for Grok-Code-Fast-1 model
87
- return OpenAI(
88
- api_key=os.getenv("POE_API_KEY"),
89
- base_url="https://api.poe.com/v1"
90
- )
91
- elif model_id == "claude-opus-4.1":
92
- # Use Poe (OpenAI-compatible) client for Claude-Opus-4.1
93
- return OpenAI(
94
- api_key=os.getenv("POE_API_KEY"),
95
- base_url="https://api.poe.com/v1"
96
- )
97
- elif model_id == "claude-sonnet-4.5":
98
- # Use Poe (OpenAI-compatible) client for Claude-Sonnet-4.5
99
- return OpenAI(
100
- api_key=os.getenv("POE_API_KEY"),
101
- base_url="https://api.poe.com/v1"
102
- )
103
- elif model_id == "claude-haiku-4.5":
104
- # Use Poe (OpenAI-compatible) client for Claude-Haiku-4.5
105
- return OpenAI(
106
- api_key=os.getenv("POE_API_KEY"),
107
- base_url="https://api.poe.com/v1"
108
- )
109
- elif model_id == "qwen3-max-preview":
110
- # Use DashScope International OpenAI client for Qwen3 Max Preview
111
- return OpenAI(
112
- api_key=os.getenv("DASHSCOPE_API_KEY"),
113
- base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
114
- )
115
- elif model_id == "openrouter/sonoma-dusk-alpha":
116
- # Use OpenRouter client for Sonoma Dusk Alpha model
117
- return OpenAI(
118
- api_key=os.getenv("OPENROUTER_API_KEY"),
119
- base_url="https://openrouter.ai/api/v1",
120
- )
121
- elif model_id == "openrouter/sonoma-sky-alpha":
122
- # Use OpenRouter client for Sonoma Sky Alpha model
123
- return OpenAI(
124
- api_key=os.getenv("OPENROUTER_API_KEY"),
125
- base_url="https://openrouter.ai/api/v1",
126
- )
127
- elif model_id == "openrouter/sherlock-dash-alpha":
128
- # Use OpenRouter client for Sherlock Dash Alpha model
129
- return OpenAI(
130
- api_key=os.getenv("OPENROUTER_API_KEY"),
131
- base_url="https://openrouter.ai/api/v1",
132
- )
133
- elif model_id == "openrouter/sherlock-think-alpha":
134
- # Use OpenRouter client for Sherlock Think Alpha model
135
- return OpenAI(
136
- api_key=os.getenv("OPENROUTER_API_KEY"),
137
- base_url="https://openrouter.ai/api/v1",
138
- )
139
- elif model_id == "MiniMaxAI/MiniMax-M2":
140
- # Use HuggingFace InferenceClient with Novita provider for MiniMax M2 model
141
- provider = "novita"
142
- elif model_id == "step-3":
143
- # Use StepFun API client for Step-3 model
144
- return OpenAI(
145
- api_key=os.getenv("STEP_API_KEY"),
146
- base_url="https://api.stepfun.com/v1"
147
- )
148
- elif model_id == "codestral-2508" or model_id == "mistral-medium-2508":
149
- # Use Mistral client for Mistral models
150
- return Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
151
- elif model_id == "gemini-2.5-flash":
152
- # Use Google Gemini (OpenAI-compatible) client
153
- return OpenAI(
154
- api_key=os.getenv("GEMINI_API_KEY"),
155
- base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
156
- )
157
- elif model_id == "gemini-2.5-pro":
158
- # Use Google Gemini Pro (OpenAI-compatible) client
159
- return OpenAI(
160
- api_key=os.getenv("GEMINI_API_KEY"),
161
- base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
162
- )
163
- elif model_id == "gemini-flash-latest":
164
- # Use Google Gemini Flash Latest (OpenAI-compatible) client
165
- return OpenAI(
166
- api_key=os.getenv("GEMINI_API_KEY"),
167
- base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
168
- )
169
- elif model_id == "gemini-flash-lite-latest":
170
- # Use Google Gemini Flash Lite Latest (OpenAI-compatible) client
171
- return OpenAI(
172
- api_key=os.getenv("GEMINI_API_KEY"),
173
- base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
174
- )
175
- elif model_id == "kimi-k2-turbo-preview":
176
- # Use Moonshot AI (OpenAI-compatible) client for Kimi K2 Turbo (Preview)
177
- return OpenAI(
178
- api_key=os.getenv("MOONSHOT_API_KEY"),
179
- base_url="https://api.moonshot.ai/v1",
180
- )
181
- elif model_id == "moonshotai/Kimi-K2-Thinking":
182
- # Use HuggingFace InferenceClient with Novita provider for Kimi K2 Thinking
183
- provider = "novita"
184
- elif model_id == "stealth-model-1":
185
- # Use stealth model with generic configuration
186
- api_key = os.getenv("STEALTH_MODEL_1_API_KEY")
187
- if not api_key:
188
- raise ValueError("STEALTH_MODEL_1_API_KEY environment variable is required for Carrot model")
189
-
190
- base_url = os.getenv("STEALTH_MODEL_1_BASE_URL")
191
- if not base_url:
192
- raise ValueError("STEALTH_MODEL_1_BASE_URL environment variable is required for Carrot model")
193
-
194
- return OpenAI(
195
- api_key=api_key,
196
- base_url=base_url,
197
- )
198
- elif model_id == "moonshotai/Kimi-K2-Instruct":
199
- provider = "groq"
200
- elif model_id == "deepseek-ai/DeepSeek-V3.1":
201
- provider = "novita"
202
- elif model_id == "deepseek-ai/DeepSeek-V3.1-Terminus":
203
- provider = "novita"
204
- elif model_id == "deepseek-ai/DeepSeek-V3.2-Exp":
205
- provider = "novita"
206
- elif model_id == "zai-org/GLM-4.5":
207
- provider = "fireworks-ai"
208
- elif model_id == "zai-org/GLM-4.6":
209
- # Use auto provider for GLM-4.6, HuggingFace will select best available
210
- provider = "auto"
211
- return InferenceClient(
212
- provider=provider,
213
- api_key=HF_TOKEN,
214
- bill_to="huggingface"
215
- )
216
-
217
- # Helper function to get real model ID for stealth models and special cases
218
- def get_real_model_id(model_id: str) -> str:
219
- """Get the real model ID, checking environment variables for stealth models and handling special model formats"""
220
- if model_id == "stealth-model-1":
221
- # Get the real model ID from environment variable
222
- real_model_id = os.getenv("STEALTH_MODEL_1_ID")
223
- if not real_model_id:
224
- raise ValueError("STEALTH_MODEL_1_ID environment variable is required for Carrot model")
225
-
226
- return real_model_id
227
- elif model_id == "zai-org/GLM-4.6":
228
- # GLM-4.6 requires provider suffix in model string for API calls
229
- return "zai-org/GLM-4.6:zai-org"
230
- return model_id
231
-
232
- # Type definitions
233
- History = List[Tuple[str, str]]
234
- Messages = List[Dict[str, str]]
235
-
236
- def history_to_messages(history: History, system: str) -> Messages:
237
- messages = [{'role': 'system', 'content': system}]
238
- for h in history:
239
- # Handle multimodal content in history
240
- user_content = h[0]
241
- if isinstance(user_content, list):
242
- # Extract text from multimodal content
243
- text_content = ""
244
- for item in user_content:
245
- if isinstance(item, dict) and item.get("type") == "text":
246
- text_content += item.get("text", "")
247
- user_content = text_content if text_content else str(user_content)
248
-
249
- messages.append({'role': 'user', 'content': user_content})
250
- messages.append({'role': 'assistant', 'content': h[1]})
251
- return messages
252
-
253
- def history_to_chatbot_messages(history: History) -> List[Dict[str, str]]:
254
- """Convert history tuples to chatbot message format"""
255
- messages = []
256
- for user_msg, assistant_msg in history:
257
- # Handle multimodal content
258
- if isinstance(user_msg, list):
259
- text_content = ""
260
- for item in user_msg:
261
- if isinstance(item, dict) and item.get("type") == "text":
262
- text_content += item.get("text", "")
263
- user_msg = text_content if text_content else str(user_msg)
264
-
265
- messages.append({"role": "user", "content": user_msg})
266
- messages.append({"role": "assistant", "content": assistant_msg})
267
- return messages
268
-
269
- def strip_tool_call_markers(text):
270
- """Remove TOOL_CALL markers that some LLMs (like Qwen) add to their output."""
271
- if not text:
272
- return text
273
- # Remove [TOOL_CALL] and [/TOOL_CALL] markers
274
- text = re.sub(r'\[/?TOOL_CALL\]', '', text, flags=re.IGNORECASE)
275
- # Remove standalone }} that appears with tool calls
276
- # Only remove if it's on its own line or at the end
277
- text = re.sub(r'^\s*\}\}\s*$', '', text, flags=re.MULTILINE)
278
- return text.strip()
279
-
280
- def remove_code_block(text):
281
- # First strip any tool call markers
282
- text = strip_tool_call_markers(text)
283
-
284
- # Try to match code blocks with language markers
285
- patterns = [
286
- r'```(?:html|HTML)\n([\s\S]+?)\n```', # Match ```html or ```HTML
287
- r'```\n([\s\S]+?)\n```', # Match code blocks without language markers
288
- r'```([\s\S]+?)```' # Match code blocks without line breaks
289
- ]
290
- for pattern in patterns:
291
- match = re.search(pattern, text, re.DOTALL)
292
- if match:
293
- extracted = match.group(1).strip()
294
- # Remove a leading language marker line (e.g., 'python') if present
295
- if extracted.split('\n', 1)[0].strip().lower() in ['python', 'html', 'css', 'javascript', 'json', 'c', 'cpp', 'markdown', 'latex', 'jinja2', 'typescript', 'yaml', 'dockerfile', 'shell', 'r', 'sql', 'sql-mssql', 'sql-mysql', 'sql-mariadb', 'sql-sqlite', 'sql-cassandra', 'sql-plSQL', 'sql-hive', 'sql-pgsql', 'sql-gql', 'sql-gpsql', 'sql-sparksql', 'sql-esper']:
296
- return extracted.split('\n', 1)[1] if '\n' in extracted else ''
297
- # If HTML markup starts later in the block (e.g., Poe injected preface), trim to first HTML root
298
- html_root_idx = None
299
- for tag in ['<!DOCTYPE html', '<html']:
300
- idx = extracted.find(tag)
301
- if idx != -1:
302
- html_root_idx = idx if html_root_idx is None else min(html_root_idx, idx)
303
- if html_root_idx is not None and html_root_idx > 0:
304
- return extracted[html_root_idx:].strip()
305
- return extracted
306
- # If no code block is found, check if the entire text is HTML
307
- stripped = text.strip()
308
- if stripped.startswith('<!DOCTYPE html>') or stripped.startswith('<html') or stripped.startswith('<'):
309
- # If HTML root appears later (e.g., Poe preface), trim to first HTML root
310
- for tag in ['<!DOCTYPE html', '<html']:
311
- idx = stripped.find(tag)
312
- if idx > 0:
313
- return stripped[idx:].strip()
314
- return stripped
315
- # Special handling for python: remove python marker
316
- if text.strip().startswith('```python'):
317
- return text.strip()[9:-3].strip()
318
- # Remove a leading language marker line if present (fallback)
319
- lines = text.strip().split('\n', 1)
320
- if lines[0].strip().lower() in ['python', 'html', 'css', 'javascript', 'json', 'c', 'cpp', 'markdown', 'latex', 'jinja2', 'typescript', 'yaml', 'dockerfile', 'shell', 'r', 'sql', 'sql-mssql', 'sql-mysql', 'sql-mariadb', 'sql-sqlite', 'sql-cassandra', 'sql-plSQL', 'sql-hive', 'sql-pgsql', 'sql-gql', 'sql-gpsql', 'sql-sparksql', 'sql-esper']:
321
- return lines[1] if len(lines) > 1 else ''
322
- return text.strip()
323
-
324
- ## React CDN compatibility fixer removed per user preference
325
-
326
- def strip_thinking_tags(text: str) -> str:
327
- """Strip <think> tags and [TOOL_CALL] markers from streaming output."""
328
- if not text:
329
- return text
330
- # Remove <think> opening tags
331
- text = re.sub(r'<think>', '', text, flags=re.IGNORECASE)
332
- # Remove </think> closing tags
333
- text = re.sub(r'</think>', '', text, flags=re.IGNORECASE)
334
- # Remove [TOOL_CALL] markers
335
- text = re.sub(r'\[/?TOOL_CALL\]', '', text, flags=re.IGNORECASE)
336
- return text
337
-
338
- def strip_placeholder_thinking(text: str) -> str:
339
- """Remove placeholder 'Thinking...' status lines from streamed text."""
340
- if not text:
341
- return text
342
- # Matches lines like: "Thinking..." or "Thinking... (12s elapsed)"
343
- return re.sub(r"(?mi)^[\t ]*Thinking\.\.\.(?:\s*\(\d+s elapsed\))?[\t ]*$\n?", "", text)
344
-
345
- def is_placeholder_thinking_only(text: str) -> bool:
346
- """Return True if text contains only 'Thinking...' placeholder lines (with optional elapsed)."""
347
- if not text:
348
- return False
349
- stripped = text.strip()
350
- if not stripped:
351
- return False
352
- return re.fullmatch(r"(?s)(?:\s*Thinking\.\.\.(?:\s*\(\d+s elapsed\))?\s*)+", stripped) is not None
353
-
354
- def extract_last_thinking_line(text: str) -> str:
355
- """Extract the last 'Thinking...' line to display as status."""
356
- matches = list(re.finditer(r"Thinking\.\.\.(?:\s*\(\d+s elapsed\))?", text))
357
- return matches[-1].group(0) if matches else "Thinking..."
358
-