Spaces:

mgbam
/

yeye

Runtime error

App Files Files Community

mgbam commited on Aug 26

Commit

0730dbb

verified ·

1 Parent(s): 9006997

Create models.py

Browse files

Files changed (1) hide show

models.py +273 -0

models.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import os
+from huggingface_hub import InferenceClient
+from openai import OpenAI
+from mistralai import Mistral
+AVAILABLE_MODELS = [
+    {
+        "name": "Moonshot Kimi-K2",
+        "id": "moonshotai/Kimi-K2-Instruct",
+        "description": "Moonshot AI Kimi-K2-Instruct model for code generation and general tasks"
+    },
+    {
+        "name": "Kimi K2 Turbo (Preview)",
+        "id": "kimi-k2-turbo-preview",
+        "description": "Moonshot AI Kimi K2 Turbo via OpenAI-compatible API"
+    },
+    {
+        "name": "DeepSeek V3",
+        "id": "deepseek-ai/DeepSeek-V3-0324",
+        "description": "DeepSeek V3 model for code generation"
+    },
+    {
+        "name": "DeepSeek V3.1",
+        "id": "deepseek-ai/DeepSeek-V3.1",
+        "description": "DeepSeek V3.1 model for code generation and general tasks"
+    },
+    {
+        "name": "DeepSeek R1",
+        "id": "deepseek-ai/DeepSeek-R1-0528",
+        "description": "DeepSeek R1 model for code generation"
+    },
+    {
+        "name": "ERNIE-4.5-VL",
+        "id": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT",
+        "description": "ERNIE-4.5-VL model for multimodal code generation with image support"
+    },
+    {
+        "name": "MiniMax M1",
+        "id": "MiniMaxAI/MiniMax-M1-80k",
+        "description": "MiniMax M1 model for code generation and general tasks"
+    },
+    {
+        "name": "Qwen3-235B-A22B",
+        "id": "Qwen/Qwen3-235B-A22B",
+        "description": "Qwen3-235B-A22B model for code generation and general tasks"
+    },
+    {
+        "name": "SmolLM3-3B",
+        "id": "HuggingFaceTB/SmolLM3-3B",
+        "description": "SmolLM3-3B model for code generation and general tasks"
+    },
+    {
+        "name": "GLM-4.5",
+        "id": "zai-org/GLM-4.5",
+        "description": "GLM-4.5 model with thinking capabilities for advanced code generation"
+    },
+    {
+        "name": "GLM-4.5V",
+        "id": "zai-org/GLM-4.5V",
+        "description": "GLM-4.5V multimodal model with image understanding for code generation"
+    },
+    {
+        "name": "GLM-4.1V-9B-Thinking",
+        "id": "THUDM/GLM-4.1V-9B-Thinking",
+        "description": "GLM-4.1V-9B-Thinking model for multimodal code generation with image support"
+    },
+    {
+        "name": "Qwen3-235B-A22B-Instruct-2507",
+        "id": "Qwen/Qwen3-235B-A22B-Instruct-2507",
+        "description": "Qwen3-235B-A22B-Instruct-2507 model for code generation and general tasks"
+    },
+    {
+        "name": "Qwen3-Coder-480B-A35B-Instruct",
+        "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
+        "description": "Qwen3-Coder-480B-A35B-Instruct model for advanced code generation and programming tasks"
+    },
+    {
+        "name": "Qwen3-32B",
+        "id": "Qwen/Qwen3-32B",
+        "description": "Qwen3-32B model for code generation and general tasks"
+    },
+    {
+        "name": "Qwen3-4B-Instruct-2507",
+        "id": "Qwen/Qwen3-4B-Instruct-2507",
+        "description": "Qwen3-4B-Instruct-2507 model for code generation and general tasks"
+    },
+    {
+        "name": "Qwen3-4B-Thinking-2507",
+        "id": "Qwen/Qwen3-4B-Thinking-2507",
+        "description": "Qwen3-4B-Thinking-2507 model with advanced reasoning capabilities for code generation and general tasks"
+    },
+    {
+        "name": "Qwen3-235B-A22B-Thinking",
+        "id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
+        "description": "Qwen3-235B-A22B-Thinking model with advanced reasoning capabilities"
+    },
+    {
+        "name": "Qwen3-30B-A3B-Instruct-2507",
+        "id": "qwen3-30b-a3b-instruct-2507",
+        "description": "Qwen3-30B-A3B-Instruct model via Alibaba Cloud DashScope API"
+    },
+    {
+        "name": "Qwen3-30B-A3B-Thinking-2507",
+        "id": "qwen3-30b-a3b-thinking-2507",
+        "description": "Qwen3-30B-A3B-Thinking model with advanced reasoning via Alibaba Cloud DashScope API"
+    },
+    {
+        "name": "Qwen3-Coder-30B-A3B-Instruct",
+        "id": "qwen3-coder-30b-a3b-instruct",
+        "description": "Qwen3-Coder-30B-A3B-Instruct model for advanced code generation via Alibaba Cloud DashScope API"
+    },
+    {
+        "name": "Cohere Command-A Reasoning 08-2025",
+        "id": "CohereLabs/command-a-reasoning-08-2025",
+        "description": "Cohere Labs Command-A Reasoning (Aug 2025) via Hugging Face InferenceClient"
+    },
+    {
+        "name": "StepFun Step-3",
+        "id": "step-3",
+        "description": "StepFun Step-3 model - AI chat assistant by 阶跃星辰 with multilingual capabilities"
+    },
+    {
+        "name": "Codestral 2508",
+        "id": "codestral-2508",
+        "description": "Mistral Codestral model - specialized for code generation and programming tasks"
+    },
+    {
+        "name": "Mistral Medium 2508",
+        "id": "mistral-medium-2508",
+        "description": "Mistral Medium 2508 model via Mistral API for general tasks and coding"
+    },
+    {
+        "name": "Gemini 2.5 Flash",
+        "id": "gemini-2.5-flash",
+        "description": "Google Gemini 2.5 Flash via OpenAI-compatible API"
+    },
+    {
+        "name": "Gemini 2.5 Pro",
+        "id": "gemini-2.5-pro",
+        "description": "Google Gemini 2.5 Pro via OpenAI-compatible API"
+    },
+    {
+        "name": "GPT-OSS-120B",
+        "id": "openai/gpt-oss-120b",
+        "description": "OpenAI GPT-OSS-120B model for advanced code generation and general tasks"
+    },
+    {
+        "name": "GPT-OSS-20B",
+        "id": "openai/gpt-oss-20b",
+        "description": "OpenAI GPT-OSS-20B model for code generation and general tasks"
+    },
+    {
+        "name": "GPT-5",
+        "id": "gpt-5",
+        "description": "OpenAI GPT-5 model for advanced code generation and general tasks"
+    },
+    {
+        "name": "Grok-4",
+        "id": "grok-4",
+        "description": "Grok-4 model via Poe (OpenAI-compatible) for advanced tasks"
+    },
+    {
+        "name": "Claude-Opus-4.1",
+        "id": "claude-opus-4.1",
+        "description": "Anthropic Claude Opus 4.1 via Poe (OpenAI-compatible)"
+    }
+]
+# Default model selection
+DEFAULT_MODEL_NAME = "Qwen3-Coder-480B-A35B-Instruct"
+DEFAULT_MODEL = None
+for _m in AVAILABLE_MODELS:
+    if _m.get("name") == DEFAULT_MODEL_NAME:
+        DEFAULT_MODEL = _m
+        break
+if DEFAULT_MODEL is None and AVAILABLE_MODELS:
+    DEFAULT_MODEL = AVAILABLE_MODELS[0]
+# HF Inference Client
+HF_TOKEN = os.getenv('HF_TOKEN')
+if not HF_TOKEN:
+    raise RuntimeError("HF_TOKEN environment variable is not set. Please set it to your Hugging Face API token.")
+def get_inference_client(model_id, provider="auto"):
+    """Return an InferenceClient with provider based on model_id and user selection."""
+    if model_id == "qwen3-30b-a3b-instruct-2507":
+        # Use DashScope OpenAI client
+        return OpenAI(
+            api_key=os.getenv("DASHSCOPE_API_KEY"),
+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+        )
+    elif model_id == "qwen3-30b-a3b-thinking-2507":
+        # Use DashScope OpenAI client for Thinking model
+        return OpenAI(
+            api_key=os.getenv("DASHSCOPE_API_KEY"),
+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+        )
+    elif model_id == "qwen3-coder-30b-a3b-instruct":
+        # Use DashScope OpenAI client for Coder model
+        return OpenAI(
+            api_key=os.getenv("DASHSCOPE_API_KEY"),
+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+        )
+    elif model_id == "gpt-5":
+        # Use Poe (OpenAI-compatible) client for GPT-5 model
+        return OpenAI(
+            api_key=os.getenv("POE_API_KEY"),
+            base_url="https://api.poe.com/v1"
+        )
+    elif model_id == "grok-4":
+        # Use Poe (OpenAI-compatible) client for Grok-4 model
+        return OpenAI(
+            api_key=os.getenv("POE_API_KEY"),
+            base_url="https://api.poe.com/v1"
+        )
+    elif model_id == "claude-opus-4.1":
+        # Use Poe (OpenAI-compatible) client for Claude-Opus-4.1
+        return OpenAI(
+            api_key=os.getenv("POE_API_KEY"),
+            base_url="https://api.poe.com/v1"
+        )
+    elif model_id == "step-3":
+        # Use StepFun API client for Step-3 model
+        return OpenAI(
+            api_key=os.getenv("STEP_API_KEY"),
+            base_url="https://api.stepfun.com/v1"
+        )
+    elif model_id == "codestral-2508" or model_id == "mistral-medium-2508":
+        # Use Mistral client for Mistral models
+        return Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
+    elif model_id == "gemini-2.5-flash":
+        # Use Google Gemini (OpenAI-compatible) client
+        return OpenAI(
+            api_key=os.getenv("GEMINI_API_KEY"),
+            base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
+        )
+    elif model_id == "gemini-2.5-pro":
+        # Use Google Gemini Pro (OpenAI-compatible) client
+        return OpenAI(
+            api_key=os.getenv("GEMINI_API_KEY"),
+            base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
+        )
+    elif model_id == "kimi-k2-turbo-preview":
+        # Use Moonshot AI (OpenAI-compatible) client for Kimi K2 Turbo (Preview)
+        return OpenAI(
+            api_key=os.getenv("MOONSHOT_API_KEY"),
+            base_url="https://api.moonshot.ai/v1",
+        )
+    elif model_id == "openai/gpt-oss-120b":
+        provider = "groq"
+    elif model_id == "openai/gpt-oss-20b":
+        provider = "groq"
+    elif model_id == "moonshotai/Kimi-K2-Instruct":
+        provider = "groq"
+    elif model_id == "Qwen/Qwen3-235B-A22B":
+        provider = "cerebras"
+    elif model_id == "Qwen/Qwen3-235B-A22B-Instruct-2507":
+        provider = "cerebras"
+    elif model_id == "Qwen/Qwen3-32B":
+        provider = "cerebras"
+    elif model_id == "Qwen/Qwen3-235B-A22B-Thinking-2507":
+        provider = "cerebras"
+    elif model_id == "Qwen/Qwen3-Coder-480B-A35B-Instruct":
+        provider = "cerebras"
+    elif model_id == "deepseek-ai/DeepSeek-V3.1":
+        provider = "novita"
+    elif model_id == "zai-org/GLM-4.5":
+        provider = "fireworks-ai"
+    return InferenceClient(
+        provider=provider,
+        api_key=HF_TOKEN,
+        bill_to="huggingface"
+    )