Spaces:

zade-frontier
/

andrej-karpathy-llm-council

Running

File size: 4,319 Bytes

import gradio as gr
from backend.council_free import stage1_collect_responses, stage2_collect_rankings, stage3_synthesize_final_stream
from backend.config_free import COUNCIL_MODELS, CHAIRMAN_MODEL


async def ask_council(question: str, progress=gr.Progress()):
    """
    Ask the LLM Council a question.
    
    The council consists of multiple advanced LLMs (currently: {models}) that:
    1. Individually answer the question
    2. Rank each other's answers
    3. Synthesize a final best answer (Chairman: {chairman})
    
    Args:
        question: The user's question to be discussed by the council.
        progress: Gradio progress tracker.
        
    Yields:
        Status updates and finally the synthesized answer.
    """.format(
        models=", ".join([m["id"].split("/")[-1] for m in COUNCIL_MODELS]), 
        chairman=CHAIRMAN_MODEL["id"].split("/")[-1]
    )

    try:
        buffer = ""

        # Stage 1: Collect individual responses
        progress(0.1, desc="Stage 1: Collecting individual responses...")
        buffer += "## 🟡 Stage 1: Collecting individual responses from council members...\n\n"
        yield buffer

        stage1_results = await stage1_collect_responses(question)

        if not stage1_results:
            buffer += "\n❌ The council failed to generate a response."
            yield buffer
            return

        # Format Stage 1 results
        buffer += f"### ✅ Received {len(stage1_results)} responses:\n"
        for res in stage1_results:
            model_name = res["model"].split("/")[-1]
            preview = res["response"][:100].replace("\n", " ") + "..."
            buffer += f"- **{model_name}**: {preview}\n"
        buffer += "\n---\n\n"
        yield buffer

        # Stage 2: Collect rankings
        progress(0.4, desc="Stage 2: Council members are ranking responses...")
        buffer += "## 🟡 Stage 2: Council members are ranking each other's responses...\n\n"
        yield buffer

        stage2_results, _ = await stage2_collect_rankings(question, stage1_results)

        # Format Stage 2 results
        buffer += "### ✅ Rankings Collected:\n"
        for res in stage2_results:
            model_name = res["model"].split("/")[-1]
            # Extract just the ranking part if possible, or just say "Ranked"
            buffer += f"- **{model_name}** has submitted their rankings.\n"
        buffer += "\n---\n\n"
        yield buffer

        # Stage 3: Synthesize final answer
        progress(0.7, desc="Stage 3: Chairman is synthesizing the final answer...")
        buffer += "## 🟡 Stage 3: Chairman is synthesizing the final answer...\n\n"
        yield buffer

        full_response = ""
        async for chunk in stage3_synthesize_final_stream(question, stage1_results, stage2_results):
            full_response += chunk
            yield buffer + full_response

        progress(1.0, desc="Complete!")

        if not full_response:
            buffer += "\n❌ The council failed to generate a final synthesis."
            yield buffer
            return

        # Let's keep the history but mark Stage 3 as done
        final_buffer = buffer.replace(
            "## 🟡 Stage 3: Chairman is synthesizing the final answer...", "## 🟢 Stage 3: Final Answer"
        )
        yield final_buffer + full_response

    except Exception as e:
        yield f"❌ Error consulting the council: {str(e)}"


description = """
An LLM Council that consults multiple AI models to answer questions through a 3-stage deliberation process.

🎯 **Council Members**: Mix of FREE HuggingFace models + OpenAI models
- Meta Llama 3.3 70B
- Qwen 2.5 72B  
- Mixtral 8x7B
- OpenAI GPT-4o-mini
- OpenAI GPT-3.5-turbo

💡 **How it works**:
1. Each model answers your question independently
2. Models rank each other's responses anonymously
3. Chairman synthesizes the best final answer

⏱️ Takes ~1-2 minutes per question (3 stages)
💰 Uses mostly FREE models!
"""

demo = gr.Interface(
    fn=ask_council,
    inputs=gr.Textbox(lines=2, placeholder="Ask the council..."),
    outputs=gr.Markdown(height=200),
    title="LLM Council MCP Server",
    description=description,
)

if __name__ == "__main__":
    # Launch with mcp_server=True to expose as MCP
    demo.launch(mcp_server=True, show_error=True)