import gradio as gr
import os
from huggingface_hub import InferenceClient

# --- UPDATED: Best Coding Models on Free Tier (2025/2026) ---
AVAILABLE_MODELS = [
    "Qwen/Qwen2.5-Coder-32B-Instruct",    # SOTA Coding Model (Best overall)
    "Qwen/Qwen2.5-72B-Instruct",          # Larger General Model (Great at Logic)
    "meta-llama/Llama-3.1-8B-Instruct",   # Fast & Reliable
    "mistralai/Mistral-7B-Instruct-v0.2:featherless-ai", # 12B Model (Very Smart)
    "zai-org/GLM-4.7:zai-org",    # Extremely Fast
    "agentica-org/DeepCoder-14B-Preview:featherless-ai", # Specialized Code Model
    "agentica-org/DeepCoder-14B-Preview",       # Backup Model
]

def respond(message, history, system_message, temperature, model_id, request: gr.Request):
    # --- 1. Authentication Logic ---
    token = None
    if request:
        token = getattr(request, "token", None)
    
    if token is None:
        token = os.getenv("HF_TOKEN")

    if token is None:
        yield "Error: No authentication token found. Please add 'HF_TOKEN' to Space Secrets."
        return

    # --- 2. Setup Client ---
    # The client is created dynamically based on the selected model_id
    client = InferenceClient(model_id, token=token)
    
    # --- 3. Build Messages ---
    messages = [{"role": "system", "content": system_message}]
    
    for msg in history:
        messages.append(msg)
    
    messages.append({"role": "user", "content": message})

    # --- 4. Generate Response ---
    try:
        stream = client.chat_completion(
            messages, 
            max_tokens=2048, 
            stream=True, 
            temperature=temperature,
            top_p=0.9
        )
        
        response_text = ""
        for chunk in stream:
            # FIX: Check if choices exist before accessing index [0]
            if not chunk.choices:
                continue
            
            content = chunk.choices[0].delta.content
            if content:
                response_text += content
                yield response_text

    except Exception as e:
        # Better Error Handling for Model Switching
        error_msg = str(e)
        if "404" in error_msg or "model_not_supported" in error_msg:
            yield f"Error: The model **{model_id}** is currently offline or overloaded. \n\n👉 Please select a different model from the dropdown above."
        else:
            yield f"Error: {error_msg}"

# --- 5. Build UI ---
with gr.Blocks(fill_height=True) as demo:
    with gr.Sidebar():
        gr.Markdown("# 🤖 Multi-Model Coding Assistant")
        gr.Markdown("Select a model from the **Additional Inputs** menu below to switch between different AI coding experts.")
        gr.LoginButton("Sign in")
    
    gr.ChatInterface(
        respond,
        type="messages", 
        additional_inputs=[
            gr.Textbox(
                value="You are an expert software engineer. Provide clean, efficient, and well-commented code.", 
                label="System Instruction", 
                lines=2
            ),
            gr.Slider(
                minimum=0.1, 
                maximum=2.0, 
                value=0.5, 
                step=0.1, 
                label="Temperature (Lower = More Precise)"
            ),
            # --- Model Selection Dropdown ---
            gr.Dropdown(
                choices=AVAILABLE_MODELS,
                value=AVAILABLE_MODELS[0], # Default to Qwen 2.5 Coder
                label="Select AI Model",
                interactive=True
            )
        ]
    )

demo.launch()