File size: 4,319 Bytes
6b53e3f
aa61236
 
6b53e3f
0a12050
 
6b53e3f
 
 
 
 
 
 
 
 
 
0a12050
6b53e3f
0a12050
 
 
aa61236
 
0a12050
 
6b53e3f
e996b22
 
0a12050
 
e996b22
 
0a12050
 
 
 
e996b22
 
0a12050
 
e996b22
 
 
 
 
 
 
 
 
0a12050
 
e996b22
 
0a12050
 
 
e996b22
 
 
 
 
 
 
 
 
0a12050
 
e996b22
 
0a12050
e996b22
 
 
 
0a12050
 
 
e996b22
 
 
0a12050
 
e996b22
 
 
 
 
0a12050
6b53e3f
0a12050
6b53e3f
932ad89
 
537891a
aa61236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
932ad89
 
6b53e3f
 
 
0a12050
6b53e3f
0a12050
6b53e3f
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import gradio as gr
from backend.council_free import stage1_collect_responses, stage2_collect_rankings, stage3_synthesize_final_stream
from backend.config_free import COUNCIL_MODELS, CHAIRMAN_MODEL


async def ask_council(question: str, progress=gr.Progress()):
    """
    Ask the LLM Council a question.
    
    The council consists of multiple advanced LLMs (currently: {models}) that:
    1. Individually answer the question
    2. Rank each other's answers
    3. Synthesize a final best answer (Chairman: {chairman})
    
    Args:
        question: The user's question to be discussed by the council.
        progress: Gradio progress tracker.
        
    Yields:
        Status updates and finally the synthesized answer.
    """.format(
        models=", ".join([m["id"].split("/")[-1] for m in COUNCIL_MODELS]), 
        chairman=CHAIRMAN_MODEL["id"].split("/")[-1]
    )

    try:
        buffer = ""

        # Stage 1: Collect individual responses
        progress(0.1, desc="Stage 1: Collecting individual responses...")
        buffer += "## 🟑 Stage 1: Collecting individual responses from council members...\n\n"
        yield buffer

        stage1_results = await stage1_collect_responses(question)

        if not stage1_results:
            buffer += "\n❌ The council failed to generate a response."
            yield buffer
            return

        # Format Stage 1 results
        buffer += f"### βœ… Received {len(stage1_results)} responses:\n"
        for res in stage1_results:
            model_name = res["model"].split("/")[-1]
            preview = res["response"][:100].replace("\n", " ") + "..."
            buffer += f"- **{model_name}**: {preview}\n"
        buffer += "\n---\n\n"
        yield buffer

        # Stage 2: Collect rankings
        progress(0.4, desc="Stage 2: Council members are ranking responses...")
        buffer += "## 🟑 Stage 2: Council members are ranking each other's responses...\n\n"
        yield buffer

        stage2_results, _ = await stage2_collect_rankings(question, stage1_results)

        # Format Stage 2 results
        buffer += "### βœ… Rankings Collected:\n"
        for res in stage2_results:
            model_name = res["model"].split("/")[-1]
            # Extract just the ranking part if possible, or just say "Ranked"
            buffer += f"- **{model_name}** has submitted their rankings.\n"
        buffer += "\n---\n\n"
        yield buffer

        # Stage 3: Synthesize final answer
        progress(0.7, desc="Stage 3: Chairman is synthesizing the final answer...")
        buffer += "## 🟑 Stage 3: Chairman is synthesizing the final answer...\n\n"
        yield buffer

        full_response = ""
        async for chunk in stage3_synthesize_final_stream(question, stage1_results, stage2_results):
            full_response += chunk
            yield buffer + full_response

        progress(1.0, desc="Complete!")

        if not full_response:
            buffer += "\n❌ The council failed to generate a final synthesis."
            yield buffer
            return

        # Let's keep the history but mark Stage 3 as done
        final_buffer = buffer.replace(
            "## 🟑 Stage 3: Chairman is synthesizing the final answer...", "## 🟒 Stage 3: Final Answer"
        )
        yield final_buffer + full_response

    except Exception as e:
        yield f"❌ Error consulting the council: {str(e)}"


description = """
An LLM Council that consults multiple AI models to answer questions through a 3-stage deliberation process.

🎯 **Council Members**: Mix of FREE HuggingFace models + OpenAI models
- Meta Llama 3.3 70B
- Qwen 2.5 72B  
- Mixtral 8x7B
- OpenAI GPT-4o-mini
- OpenAI GPT-3.5-turbo

πŸ’‘ **How it works**:
1. Each model answers your question independently
2. Models rank each other's responses anonymously
3. Chairman synthesizes the best final answer

⏱️ Takes ~1-2 minutes per question (3 stages)
πŸ’° Uses mostly FREE models!
"""

demo = gr.Interface(
    fn=ask_council,
    inputs=gr.Textbox(lines=2, placeholder="Ask the council..."),
    outputs=gr.Markdown(height=200),
    title="LLM Council MCP Server",
    description=description,
)

if __name__ == "__main__":
    # Launch with mcp_server=True to expose as MCP
    demo.launch(mcp_server=True, show_error=True)