import gradio as gr import os from huggingface_hub import InferenceClient # --- UPDATED: Best Coding Models on Free Tier (2025/2026) --- AVAILABLE_MODELS = [ "Qwen/Qwen2.5-Coder-32B-Instruct", # SOTA Coding Model (Best overall) "Qwen/Qwen2.5-72B-Instruct", # Larger General Model (Great at Logic) "meta-llama/Llama-3.1-8B-Instruct", # Fast & Reliable "mistralai/Mistral-7B-Instruct-v0.2:featherless-ai", # 12B Model (Very Smart) "zai-org/GLM-4.7:zai-org", # Extremely Fast "agentica-org/DeepCoder-14B-Preview:featherless-ai", # Specialized Code Model "agentica-org/DeepCoder-14B-Preview", # Backup Model ] def respond(message, history, system_message, temperature, model_id, request: gr.Request): # --- 1. Authentication Logic --- token = None if request: token = getattr(request, "token", None) if token is None: token = os.getenv("HF_TOKEN") if token is None: yield "Error: No authentication token found. Please add 'HF_TOKEN' to Space Secrets." return # --- 2. Setup Client --- # The client is created dynamically based on the selected model_id client = InferenceClient(model_id, token=token) # --- 3. Build Messages --- messages = [{"role": "system", "content": system_message}] for msg in history: messages.append(msg) messages.append({"role": "user", "content": message}) # --- 4. Generate Response --- try: stream = client.chat_completion( messages, max_tokens=2048, stream=True, temperature=temperature, top_p=0.9 ) response_text = "" for chunk in stream: # FIX: Check if choices exist before accessing index [0] if not chunk.choices: continue content = chunk.choices[0].delta.content if content: response_text += content yield response_text except Exception as e: # Better Error Handling for Model Switching error_msg = str(e) if "404" in error_msg or "model_not_supported" in error_msg: yield f"Error: The model **{model_id}** is currently offline or overloaded. \n\nšŸ‘‰ Please select a different model from the dropdown above." else: yield f"Error: {error_msg}" # --- 5. Build UI --- with gr.Blocks(fill_height=True) as demo: with gr.Sidebar(): gr.Markdown("# šŸ¤– Multi-Model Coding Assistant") gr.Markdown("Select a model from the **Additional Inputs** menu below to switch between different AI coding experts.") gr.LoginButton("Sign in") gr.ChatInterface( respond, type="messages", additional_inputs=[ gr.Textbox( value="You are an expert software engineer. Provide clean, efficient, and well-commented code.", label="System Instruction", lines=2 ), gr.Slider( minimum=0.1, maximum=2.0, value=0.5, step=0.1, label="Temperature (Lower = More Precise)" ), # --- Model Selection Dropdown --- gr.Dropdown( choices=AVAILABLE_MODELS, value=AVAILABLE_MODELS[0], # Default to Qwen 2.5 Coder label="Select AI Model", interactive=True ) ] ) demo.launch()