Krishna Chaitanya Cheedella
commited on
Commit
·
aa61236
1
Parent(s):
4197191
Refactor to use FREE HuggingFace models + OpenAI instead of OpenRouter
Browse files- .env.example +12 -0
- CODE_ANALYSIS.md +281 -0
- DEPLOYMENT_GUIDE.md +343 -0
- IMPROVEMENTS_SUMMARY.md +216 -0
- QUICKSTART.md +149 -0
- README.md +145 -1
- app.py +21 -7
- backend/api_client.py +355 -0
- backend/config_free.py +86 -0
- backend/config_improved.py +78 -0
- backend/council_free.py +386 -0
- backend/openrouter_improved.py +192 -0
- requirements.txt +11 -0
.env.example
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Environment Variables Template
|
| 2 |
+
# Copy this file to .env and fill in your values
|
| 3 |
+
# DO NOT commit .env to version control!
|
| 4 |
+
|
| 5 |
+
# OpenAI API Key (Required for OpenAI models)
|
| 6 |
+
# Get your key from: https://platform.openai.com/api-keys
|
| 7 |
+
OPENAI_API_KEY=your_openai_api_key_here
|
| 8 |
+
|
| 9 |
+
# HuggingFace API Key (Required for HF Inference API - FREE models)
|
| 10 |
+
# Get your key from: https://huggingface.co/settings/tokens
|
| 11 |
+
HUGGINGFACE_API_KEY=your_huggingface_token_here
|
| 12 |
+
|
CODE_ANALYSIS.md
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Code Analysis & Refactoring Summary
|
| 2 |
+
|
| 3 |
+
## 📊 Code Quality Analysis
|
| 4 |
+
|
| 5 |
+
### ✅ Strengths
|
| 6 |
+
|
| 7 |
+
1. **Clean Architecture**
|
| 8 |
+
- Well-separated concerns (council logic, API client, storage)
|
| 9 |
+
- Clear 3-stage pipeline design
|
| 10 |
+
- Async/await properly implemented
|
| 11 |
+
|
| 12 |
+
2. **Good Gradio Integration**
|
| 13 |
+
- Progressive UI updates with streaming
|
| 14 |
+
- MCP server capability enabled
|
| 15 |
+
- User-friendly progress indicators
|
| 16 |
+
|
| 17 |
+
3. **Solid Core Logic**
|
| 18 |
+
- Parallel model querying for efficiency
|
| 19 |
+
- Anonymous ranking system to reduce bias
|
| 20 |
+
- Structured synthesis approach
|
| 21 |
+
|
| 22 |
+
### ⚠️ Issues Found
|
| 23 |
+
|
| 24 |
+
1. **Outdated/Unstable Models**
|
| 25 |
+
- Using experimental endpoints (`:hyperbolic`, `:novita`)
|
| 26 |
+
- Models may have limited availability
|
| 27 |
+
- Inconsistent provider backends
|
| 28 |
+
|
| 29 |
+
2. **Missing Error Handling**
|
| 30 |
+
- No retry logic for failed API calls
|
| 31 |
+
- Timeouts not configurable
|
| 32 |
+
- Silent failures in parallel queries
|
| 33 |
+
|
| 34 |
+
3. **Limited Configuration**
|
| 35 |
+
- Hardcoded timeouts
|
| 36 |
+
- No alternative model configs
|
| 37 |
+
- Missing environment validation
|
| 38 |
+
|
| 39 |
+
4. **No Dependencies File**
|
| 40 |
+
- Missing `requirements.txt`
|
| 41 |
+
- Unclear Python version requirements
|
| 42 |
+
|
| 43 |
+
5. **Incomplete Documentation**
|
| 44 |
+
- No deployment guide
|
| 45 |
+
- Missing local setup instructions
|
| 46 |
+
- No troubleshooting section
|
| 47 |
+
|
| 48 |
+
## 🔄 Refactoring Completed
|
| 49 |
+
|
| 50 |
+
### 1. Created `requirements.txt`
|
| 51 |
+
```txt
|
| 52 |
+
gradio>=6.0.0
|
| 53 |
+
httpx>=0.27.0
|
| 54 |
+
python-dotenv>=1.0.0
|
| 55 |
+
fastapi>=0.115.0
|
| 56 |
+
uvicorn>=0.30.0
|
| 57 |
+
pydantic>=2.0.0
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
### 2. Improved Configuration (`config_improved.py`)
|
| 61 |
+
|
| 62 |
+
**Better Model Selection:**
|
| 63 |
+
```python
|
| 64 |
+
# Balanced quality & cost
|
| 65 |
+
COUNCIL_MODELS = [
|
| 66 |
+
"deepseek/deepseek-chat", # DeepSeek V3
|
| 67 |
+
"anthropic/claude-3.7-sonnet", # Claude 3.7
|
| 68 |
+
"openai/gpt-4o", # GPT-4o
|
| 69 |
+
"google/gemini-2.0-flash-thinking-exp:free",
|
| 70 |
+
"qwen/qwq-32b-preview",
|
| 71 |
+
]
|
| 72 |
+
CHAIRMAN_MODEL = "deepseek/deepseek-reasoner"
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
**Why These Models:**
|
| 76 |
+
- **DeepSeek Chat**: Latest V3, excellent reasoning, cost-effective (~$0.15/M tokens)
|
| 77 |
+
- **Claude 3.7 Sonnet**: Strong analytical skills, good at synthesis
|
| 78 |
+
- **GPT-4o**: Reliable, well-rounded, OpenAI's latest multimodal
|
| 79 |
+
- **Gemini 2.0 Flash Thinking**: Fast, free tier available, reasoning capabilities
|
| 80 |
+
- **QwQ 32B**: Strong reasoning model, good value
|
| 81 |
+
|
| 82 |
+
**Alternative Configurations:**
|
| 83 |
+
- Budget Council (fast & cheap)
|
| 84 |
+
- Premium Council (maximum quality)
|
| 85 |
+
- Reasoning Council (complex problems)
|
| 86 |
+
|
| 87 |
+
### 3. Enhanced API Client (`openrouter_improved.py`)
|
| 88 |
+
|
| 89 |
+
**Added Features:**
|
| 90 |
+
- ✅ Retry logic with exponential backoff
|
| 91 |
+
- ✅ Configurable timeouts
|
| 92 |
+
- ✅ Better error categorization (4xx vs 5xx)
|
| 93 |
+
- ✅ Status reporting for parallel queries
|
| 94 |
+
- ✅ Proper HTTP headers (Referer, Title)
|
| 95 |
+
- ✅ Graceful stream error handling
|
| 96 |
+
|
| 97 |
+
**Error Handling Example:**
|
| 98 |
+
```python
|
| 99 |
+
for attempt in range(max_retries + 1):
|
| 100 |
+
try:
|
| 101 |
+
# API call
|
| 102 |
+
except httpx.TimeoutException:
|
| 103 |
+
# Retry with exponential backoff
|
| 104 |
+
except httpx.HTTPStatusError:
|
| 105 |
+
# Don't retry 4xx, retry 5xx
|
| 106 |
+
except Exception:
|
| 107 |
+
# Retry generic errors
|
| 108 |
+
```
|
| 109 |
+
|
| 110 |
+
### 4. Comprehensive Documentation
|
| 111 |
+
|
| 112 |
+
Created `DEPLOYMENT_GUIDE.md` with:
|
| 113 |
+
- Architecture diagrams
|
| 114 |
+
- Model recommendations & comparisons
|
| 115 |
+
- Step-by-step HF Spaces deployment
|
| 116 |
+
- Local setup instructions
|
| 117 |
+
- Performance characteristics
|
| 118 |
+
- Cost estimates
|
| 119 |
+
- Troubleshooting guide
|
| 120 |
+
- Best practices
|
| 121 |
+
|
| 122 |
+
### 5. Environment Template
|
| 123 |
+
|
| 124 |
+
Created `.env.example` for easy setup
|
| 125 |
+
|
| 126 |
+
## 📈 Improvements Summary
|
| 127 |
+
|
| 128 |
+
| Aspect | Before | After | Impact |
|
| 129 |
+
|--------|--------|-------|--------|
|
| 130 |
+
| **Error Handling** | None | Retry + backoff | 🟢 Better reliability |
|
| 131 |
+
| **Model Selection** | Experimental endpoints | Stable latest models | 🟢 Better quality |
|
| 132 |
+
| **Configuration** | Hardcoded | Multiple presets | 🟢 More flexible |
|
| 133 |
+
| **Documentation** | Basic README | Full deployment guide | 🟢 Easier to use |
|
| 134 |
+
| **Dependencies** | Missing | Complete requirements.txt | 🟢 Clear setup |
|
| 135 |
+
| **Logging** | Minimal | Detailed status updates | 🟢 Better debugging |
|
| 136 |
+
|
| 137 |
+
## 🎯 Recommended Next Steps
|
| 138 |
+
|
| 139 |
+
### Immediate Actions
|
| 140 |
+
|
| 141 |
+
1. **Update to Improved Files**
|
| 142 |
+
```bash
|
| 143 |
+
# Backup originals
|
| 144 |
+
cp backend/config.py backend/config_original.py
|
| 145 |
+
cp backend/openrouter.py backend/openrouter_original.py
|
| 146 |
+
|
| 147 |
+
# Use improved versions
|
| 148 |
+
mv backend/config_improved.py backend/config.py
|
| 149 |
+
mv backend/openrouter_improved.py backend/openrouter.py
|
| 150 |
+
```
|
| 151 |
+
|
| 152 |
+
2. **Test Locally**
|
| 153 |
+
```bash
|
| 154 |
+
pip install -r requirements.txt
|
| 155 |
+
cp .env.example .env
|
| 156 |
+
# Edit .env with your API key
|
| 157 |
+
python app.py
|
| 158 |
+
```
|
| 159 |
+
|
| 160 |
+
3. **Deploy to HF Spaces**
|
| 161 |
+
- Follow DEPLOYMENT_GUIDE.md
|
| 162 |
+
- Add OPENROUTER_API_KEY to secrets
|
| 163 |
+
- Monitor first few queries
|
| 164 |
+
|
| 165 |
+
### Future Enhancements
|
| 166 |
+
|
| 167 |
+
1. **Caching System**
|
| 168 |
+
- Cache responses for identical questions
|
| 169 |
+
- Reduce API costs for repeated queries
|
| 170 |
+
- Implement TTL-based expiration
|
| 171 |
+
|
| 172 |
+
2. **UI Improvements**
|
| 173 |
+
- Show model costs in real-time
|
| 174 |
+
- Allow custom model selection
|
| 175 |
+
- Add export functionality
|
| 176 |
+
|
| 177 |
+
3. **Advanced Features**
|
| 178 |
+
- Multi-turn conversations with context
|
| 179 |
+
- Custom voting weights
|
| 180 |
+
- A/B testing different councils
|
| 181 |
+
- Cost tracking dashboard
|
| 182 |
+
|
| 183 |
+
4. **Performance Optimization**
|
| 184 |
+
- Parallel stage execution where possible
|
| 185 |
+
- Response streaming in Stage 1
|
| 186 |
+
- Lazy loading of rankings
|
| 187 |
+
|
| 188 |
+
5. **Monitoring & Analytics**
|
| 189 |
+
- Track response quality metrics
|
| 190 |
+
- Log aggregate rankings over time
|
| 191 |
+
- Identify best-performing models
|
| 192 |
+
|
| 193 |
+
## 💰 Cost Analysis
|
| 194 |
+
|
| 195 |
+
### Per Query Estimates
|
| 196 |
+
|
| 197 |
+
**Budget Council** (~$0.01-0.03/query)
|
| 198 |
+
- 4 models × $0.002 (avg) = $0.008
|
| 199 |
+
- Chairman × $0.002 = $0.002
|
| 200 |
+
- Total: ~$0.01
|
| 201 |
+
|
| 202 |
+
**Balanced Council** (~$0.05-0.15/query)
|
| 203 |
+
- 5 models × $0.01 (avg) = $0.05
|
| 204 |
+
- Chairman × $0.02 = $0.02
|
| 205 |
+
- Total: ~$0.07
|
| 206 |
+
|
| 207 |
+
**Premium Council** (~$0.20-0.50/query)
|
| 208 |
+
- 5 premium models × $0.05 (avg) = $0.25
|
| 209 |
+
- Chairman (o1) × $0.10 = $0.10
|
| 210 |
+
- Total: ~$0.35
|
| 211 |
+
|
| 212 |
+
*Note: Costs vary by prompt length and complexity*
|
| 213 |
+
|
| 214 |
+
### Monthly Budget Examples
|
| 215 |
+
|
| 216 |
+
- **Light use** (10 queries/day): ~$20-50/month (Balanced)
|
| 217 |
+
- **Medium use** (50 queries/day): ~$100-250/month (Balanced)
|
| 218 |
+
- **Heavy use** (200 queries/day): ~$400-1000/month (Balanced)
|
| 219 |
+
|
| 220 |
+
## 🧪 Testing Recommendations
|
| 221 |
+
|
| 222 |
+
### Test Cases
|
| 223 |
+
|
| 224 |
+
1. **Simple Question**
|
| 225 |
+
- "What is the capital of France?"
|
| 226 |
+
- Expected: All models agree, quick synthesis
|
| 227 |
+
|
| 228 |
+
2. **Complex Analysis**
|
| 229 |
+
- "Compare the economic impacts of renewable vs fossil fuel energy"
|
| 230 |
+
- Expected: Diverse perspectives, thoughtful synthesis
|
| 231 |
+
|
| 232 |
+
3. **Technical Question**
|
| 233 |
+
- "Explain quantum entanglement in simple terms"
|
| 234 |
+
- Expected: Varied explanations, best synthesis chosen
|
| 235 |
+
|
| 236 |
+
4. **Math Problem**
|
| 237 |
+
- "If a train travels 120km in 1.5 hours, what is its average speed?"
|
| 238 |
+
- Expected: Consistent answers, verification of logic
|
| 239 |
+
|
| 240 |
+
5. **Controversial Topic**
|
| 241 |
+
- "What are the pros and cons of nuclear energy?"
|
| 242 |
+
- Expected: Balanced viewpoints, nuanced synthesis
|
| 243 |
+
|
| 244 |
+
### Monitoring
|
| 245 |
+
|
| 246 |
+
Watch for:
|
| 247 |
+
- Response times > 2 minutes
|
| 248 |
+
- Multiple model failures
|
| 249 |
+
- Inconsistent rankings
|
| 250 |
+
- Poor synthesis quality
|
| 251 |
+
- API rate limits
|
| 252 |
+
|
| 253 |
+
## 🔍 Code Review Checklist
|
| 254 |
+
|
| 255 |
+
- [x] Error handling implemented
|
| 256 |
+
- [x] Retry logic added
|
| 257 |
+
- [x] Timeouts configurable
|
| 258 |
+
- [x] Models updated to stable versions
|
| 259 |
+
- [x] Documentation complete
|
| 260 |
+
- [x] Dependencies specified
|
| 261 |
+
- [x] Environment template created
|
| 262 |
+
- [x] Local testing instructions
|
| 263 |
+
- [x] Deployment guide written
|
| 264 |
+
- [ ] Unit tests (future)
|
| 265 |
+
- [ ] Integration tests (future)
|
| 266 |
+
- [ ] CI/CD pipeline (future)
|
| 267 |
+
|
| 268 |
+
## 📝 Notes
|
| 269 |
+
|
| 270 |
+
The improved codebase maintains backward compatibility while adding:
|
| 271 |
+
- Better reliability through retries
|
| 272 |
+
- More flexible configuration
|
| 273 |
+
- Clearer documentation
|
| 274 |
+
- Production-ready error handling
|
| 275 |
+
|
| 276 |
+
All improvements are in separate files (`*_improved.py`) so you can:
|
| 277 |
+
1. Test new versions alongside old
|
| 278 |
+
2. Gradually migrate
|
| 279 |
+
3. Roll back if needed
|
| 280 |
+
|
| 281 |
+
The original design is solid - these improvements make it production-ready!
|
DEPLOYMENT_GUIDE.md
ADDED
|
@@ -0,0 +1,343 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# LLM Council - Comprehensive Guide
|
| 2 |
+
|
| 3 |
+
## 📋 Overview
|
| 4 |
+
|
| 5 |
+
The LLM Council is a sophisticated multi-agent system that uses multiple Large Language Models (LLMs) to collectively answer questions through a 3-stage deliberation process:
|
| 6 |
+
|
| 7 |
+
1. **Stage 1 - Individual Responses**: Each council member independently answers the question
|
| 8 |
+
2. **Stage 2 - Peer Review**: Council members rank each other's anonymized responses
|
| 9 |
+
3. **Stage 3 - Synthesis**: A chairman model synthesizes the final answer based on all inputs
|
| 10 |
+
|
| 11 |
+
## 🏗️ Architecture
|
| 12 |
+
|
| 13 |
+
### Current Implementation
|
| 14 |
+
|
| 15 |
+
```
|
| 16 |
+
┌─────────────────────────────────────────────────────────────┐
|
| 17 |
+
│ User Question │
|
| 18 |
+
└────────────────────────┬────────────────────────────────────┘
|
| 19 |
+
│
|
| 20 |
+
▼
|
| 21 |
+
┌─────────────────────────────────────────────────────────────┐
|
| 22 |
+
│ Stage 1: Parallel Responses from 3-5 Council Models │
|
| 23 |
+
│ • Model 1: Individual answer │
|
| 24 |
+
│ • Model 2: Individual answer │
|
| 25 |
+
│ • Model 3: Individual answer │
|
| 26 |
+
│ • (etc...) │
|
| 27 |
+
└────────────────────────┬────────────────────────────────────┘
|
| 28 |
+
│
|
| 29 |
+
▼
|
| 30 |
+
┌─────────────────────────────────────────────────────────────┐
|
| 31 |
+
│ Stage 2: Peer Rankings (Anonymized) │
|
| 32 |
+
│ • Each model ranks all responses (Response A, B, C...) │
|
| 33 |
+
│ • Aggregate rankings calculated │
|
| 34 |
+
└────────────────────────┬────────────────────────────────────┘
|
| 35 |
+
│
|
| 36 |
+
▼
|
| 37 |
+
┌─────────────────────────────────────────────────────────────┐
|
| 38 |
+
│ Stage 3: Chairman Synthesis │
|
| 39 |
+
│ • Reviews all responses + rankings │
|
| 40 |
+
│ • Generates final comprehensive answer │
|
| 41 |
+
└─────────────────────────────────────────────────────────────┘
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
## 🔧 Current Models (Original)
|
| 45 |
+
|
| 46 |
+
### Council Members
|
| 47 |
+
- `openai/gpt-oss-120b:hyperbolic` - Open source model via Hyperbolic
|
| 48 |
+
- `deepseek-ai/DeepSeek-V3.2-Exp:novita` - DeepSeek experimental via Novita
|
| 49 |
+
- `Qwen/Qwen3-235B-A22B-Instruct-2507:hyperbolic` - Qwen large model
|
| 50 |
+
|
| 51 |
+
### Chairman
|
| 52 |
+
- `deepseek-ai/DeepSeek-V3.2-Exp:novita`
|
| 53 |
+
|
| 54 |
+
**Issues with Current Setup:**
|
| 55 |
+
- Using experimental/beta endpoints which may be unstable
|
| 56 |
+
- Limited diversity in model providers
|
| 57 |
+
- Some models may not be optimally configured
|
| 58 |
+
|
| 59 |
+
## ✨ IMPROVED Model Recommendations
|
| 60 |
+
|
| 61 |
+
### Recommended Council (Balanced Quality & Cost)
|
| 62 |
+
|
| 63 |
+
```python
|
| 64 |
+
COUNCIL_MODELS = [
|
| 65 |
+
"deepseek/deepseek-chat", # DeepSeek V3 - excellent reasoning
|
| 66 |
+
"anthropic/claude-3.7-sonnet", # Claude 3.7 - strong analysis
|
| 67 |
+
"openai/gpt-4o", # GPT-4o - reliable & versatile
|
| 68 |
+
"google/gemini-2.0-flash-thinking-exp:free", # Fast thinking
|
| 69 |
+
"qwen/qwq-32b-preview", # Strong reasoning
|
| 70 |
+
]
|
| 71 |
+
|
| 72 |
+
CHAIRMAN_MODEL = "deepseek/deepseek-reasoner" # DeepSeek R1 for synthesis
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
### Alternative Configurations
|
| 76 |
+
|
| 77 |
+
#### Budget Council (Fast & Cost-Effective)
|
| 78 |
+
```python
|
| 79 |
+
COUNCIL_MODELS = [
|
| 80 |
+
"deepseek/deepseek-chat",
|
| 81 |
+
"google/gemini-2.0-flash-exp:free",
|
| 82 |
+
"qwen/qwen-2.5-72b-instruct",
|
| 83 |
+
"meta-llama/llama-3.3-70b-instruct",
|
| 84 |
+
]
|
| 85 |
+
CHAIRMAN_MODEL = "deepseek/deepseek-chat"
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
#### Premium Council (Maximum Quality)
|
| 89 |
+
```python
|
| 90 |
+
COUNCIL_MODELS = [
|
| 91 |
+
"anthropic/claude-3.7-sonnet",
|
| 92 |
+
"openai/o1",
|
| 93 |
+
"google/gemini-exp-1206",
|
| 94 |
+
"anthropic/claude-3-opus",
|
| 95 |
+
"x-ai/grok-2-1212",
|
| 96 |
+
]
|
| 97 |
+
CHAIRMAN_MODEL = "openai/o1" # or "anthropic/claude-3.7-sonnet"
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
#### Reasoning Council (Complex Problems)
|
| 101 |
+
```python
|
| 102 |
+
COUNCIL_MODELS = [
|
| 103 |
+
"openai/o1-mini",
|
| 104 |
+
"deepseek/deepseek-reasoner",
|
| 105 |
+
"google/gemini-2.0-flash-thinking-exp:free",
|
| 106 |
+
"qwen/qwq-32b-preview",
|
| 107 |
+
]
|
| 108 |
+
CHAIRMAN_MODEL = "deepseek/deepseek-reasoner"
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
## 🚀 Running on Hugging Face Spaces
|
| 112 |
+
|
| 113 |
+
### Prerequisites
|
| 114 |
+
|
| 115 |
+
1. **OpenRouter API Key**: Sign up at [openrouter.ai](https://openrouter.ai/) and get your API key
|
| 116 |
+
|
| 117 |
+
2. **Hugging Face Account**: Create account at [huggingface.co](https://huggingface.co/)
|
| 118 |
+
|
| 119 |
+
### Step-by-Step Deployment
|
| 120 |
+
|
| 121 |
+
#### Method 1: Using Existing Space (Fork)
|
| 122 |
+
|
| 123 |
+
1. **Fork the Space**
|
| 124 |
+
- Visit: https://huggingface.co/spaces/burtenshaw/karpathy-llm-council
|
| 125 |
+
- Click "⋮" → "Duplicate this Space"
|
| 126 |
+
- Choose a name for your space
|
| 127 |
+
|
| 128 |
+
2. **Configure Secrets**
|
| 129 |
+
- Go to your space → Settings → Repository secrets
|
| 130 |
+
- Add secret: `OPENROUTER_API_KEY` with your OpenRouter API key
|
| 131 |
+
|
| 132 |
+
3. **Update Models (Optional)**
|
| 133 |
+
- Edit `backend/config.py` to use recommended models
|
| 134 |
+
- Commit changes
|
| 135 |
+
|
| 136 |
+
4. **Space Auto-Restarts**
|
| 137 |
+
- HF Spaces will automatically rebuild and deploy
|
| 138 |
+
|
| 139 |
+
#### Method 2: Create New Space from Scratch
|
| 140 |
+
|
| 141 |
+
1. **Create New Space**
|
| 142 |
+
```
|
| 143 |
+
- Go to huggingface.co/new-space
|
| 144 |
+
- Choose "Gradio" as SDK
|
| 145 |
+
- Select SDK version: 6.0.0
|
| 146 |
+
- Choose hardware: CPU (free) or GPU (paid)
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
2. **Upload Files**
|
| 150 |
+
```bash
|
| 151 |
+
# Clone your local repo
|
| 152 |
+
git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME
|
| 153 |
+
cd YOUR_SPACE_NAME
|
| 154 |
+
|
| 155 |
+
# Copy your files
|
| 156 |
+
cp -r /path/to/llm_council/* .
|
| 157 |
+
|
| 158 |
+
# Add and commit
|
| 159 |
+
git add .
|
| 160 |
+
git commit -m "Initial commit"
|
| 161 |
+
git push
|
| 162 |
+
```
|
| 163 |
+
|
| 164 |
+
3. **Configure Space**
|
| 165 |
+
- Create `README.md` with metadata:
|
| 166 |
+
```markdown
|
| 167 |
+
---
|
| 168 |
+
title: LLM Council
|
| 169 |
+
emoji: 🏢
|
| 170 |
+
colorFrom: pink
|
| 171 |
+
colorTo: green
|
| 172 |
+
sdk: gradio
|
| 173 |
+
sdk_version: 6.0.0
|
| 174 |
+
app_file: app.py
|
| 175 |
+
pinned: false
|
| 176 |
+
---
|
| 177 |
+
```
|
| 178 |
+
|
| 179 |
+
4. **Add Secret**
|
| 180 |
+
- Settings → Repository secrets → Add `OPENROUTER_API_KEY`
|
| 181 |
+
|
| 182 |
+
### Required Files Structure
|
| 183 |
+
|
| 184 |
+
```
|
| 185 |
+
your-space/
|
| 186 |
+
├── README.md # Space configuration
|
| 187 |
+
├── requirements.txt # Python dependencies
|
| 188 |
+
├── app.py # Main Gradio app
|
| 189 |
+
├── .env.example # Environment template
|
| 190 |
+
└── backend/
|
| 191 |
+
├── __init__.py
|
| 192 |
+
├── config.py # Model configuration
|
| 193 |
+
├── council.py # 3-stage logic
|
| 194 |
+
├── openrouter.py # API client
|
| 195 |
+
├── storage.py # Data storage
|
| 196 |
+
└── main.py # FastAPI (optional)
|
| 197 |
+
```
|
| 198 |
+
|
| 199 |
+
## 🔐 Environment Variables
|
| 200 |
+
|
| 201 |
+
Create `.env` file locally (DO NOT commit to git):
|
| 202 |
+
|
| 203 |
+
```env
|
| 204 |
+
OPENROUTER_API_KEY=your_openrouter_api_key_here
|
| 205 |
+
```
|
| 206 |
+
|
| 207 |
+
For Hugging Face Spaces, use Repository Secrets instead of `.env` file.
|
| 208 |
+
|
| 209 |
+
## 📦 Dependencies
|
| 210 |
+
|
| 211 |
+
```txt
|
| 212 |
+
gradio>=6.0.0
|
| 213 |
+
httpx>=0.27.0
|
| 214 |
+
python-dotenv>=1.0.0
|
| 215 |
+
fastapi>=0.115.0 # Optional - for REST API
|
| 216 |
+
uvicorn>=0.30.0 # Optional - for REST API
|
| 217 |
+
pydantic>=2.0.0 # Optional - for REST API
|
| 218 |
+
```
|
| 219 |
+
|
| 220 |
+
## 💻 Running Locally
|
| 221 |
+
|
| 222 |
+
```bash
|
| 223 |
+
# 1. Clone repository
|
| 224 |
+
git clone https://huggingface.co/spaces/burtenshaw/karpathy-llm-council
|
| 225 |
+
cd karpathy-llm-council
|
| 226 |
+
|
| 227 |
+
# 2. Create virtual environment
|
| 228 |
+
python -m venv venv
|
| 229 |
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
| 230 |
+
|
| 231 |
+
# 3. Install dependencies
|
| 232 |
+
pip install -r requirements.txt
|
| 233 |
+
|
| 234 |
+
# 4. Create .env file
|
| 235 |
+
echo "OPENROUTER_API_KEY=your_key_here" > .env
|
| 236 |
+
|
| 237 |
+
# 5. Run the app
|
| 238 |
+
python app.py
|
| 239 |
+
```
|
| 240 |
+
|
| 241 |
+
The app will be available at `http://localhost:7860`
|
| 242 |
+
|
| 243 |
+
## 🔧 Code Improvements Made
|
| 244 |
+
|
| 245 |
+
### 1. Enhanced Error Handling
|
| 246 |
+
- Retry logic with exponential backoff
|
| 247 |
+
- Graceful handling of model failures
|
| 248 |
+
- Better timeout management
|
| 249 |
+
- Detailed error logging
|
| 250 |
+
|
| 251 |
+
### 2. Better Model Configuration
|
| 252 |
+
- Updated to latest stable models
|
| 253 |
+
- Multiple configuration presets
|
| 254 |
+
- Configurable timeouts and retries
|
| 255 |
+
- Clear documentation of alternatives
|
| 256 |
+
|
| 257 |
+
### 3. Improved API Client
|
| 258 |
+
- Proper HTTP headers (Referer, Title)
|
| 259 |
+
- Robust streaming support
|
| 260 |
+
- Better exception handling
|
| 261 |
+
- Status reporting during parallel queries
|
| 262 |
+
|
| 263 |
+
### 4. Documentation
|
| 264 |
+
- Comprehensive deployment guide
|
| 265 |
+
- Architecture diagrams
|
| 266 |
+
- Configuration examples
|
| 267 |
+
- Troubleshooting tips
|
| 268 |
+
|
| 269 |
+
## 📊 Performance Characteristics
|
| 270 |
+
|
| 271 |
+
### Typical Response Times (Balanced Config)
|
| 272 |
+
- **Stage 1**: 10-30 seconds (parallel execution)
|
| 273 |
+
- **Stage 2**: 15-45 seconds (parallel ranking)
|
| 274 |
+
- **Stage 3**: 20-60 seconds (synthesis)
|
| 275 |
+
- **Total**: ~45-135 seconds per question
|
| 276 |
+
|
| 277 |
+
### Cost per Query (Approximate)
|
| 278 |
+
- Budget Council: $0.01 - $0.03
|
| 279 |
+
- Balanced Council: $0.05 - $0.15
|
| 280 |
+
- Premium Council: $0.20 - $0.50
|
| 281 |
+
|
| 282 |
+
*Costs vary based on prompt length and response complexity*
|
| 283 |
+
|
| 284 |
+
## 🐛 Troubleshooting
|
| 285 |
+
|
| 286 |
+
### Common Issues
|
| 287 |
+
|
| 288 |
+
1. **"All models failed to respond"**
|
| 289 |
+
- Check API key is valid
|
| 290 |
+
- Verify OpenRouter credit balance
|
| 291 |
+
- Check model availability on OpenRouter
|
| 292 |
+
|
| 293 |
+
2. **Timeout errors**
|
| 294 |
+
- Increase timeout in config
|
| 295 |
+
- Use faster models
|
| 296 |
+
- Check network connectivity
|
| 297 |
+
|
| 298 |
+
3. **Space won't start**
|
| 299 |
+
- Verify `requirements.txt` is correct
|
| 300 |
+
- Check logs in Space → Logs tab
|
| 301 |
+
- Ensure Python version compatibility
|
| 302 |
+
|
| 303 |
+
4. **Slow responses**
|
| 304 |
+
- Consider Budget Council configuration
|
| 305 |
+
- Reduce number of council members
|
| 306 |
+
- Use faster models
|
| 307 |
+
|
| 308 |
+
## 🎯 Best Practices
|
| 309 |
+
|
| 310 |
+
1. **Model Selection**
|
| 311 |
+
- Use 3-5 council members (sweet spot)
|
| 312 |
+
- Choose diverse models from different providers
|
| 313 |
+
- Match chairman to task complexity
|
| 314 |
+
|
| 315 |
+
2. **Cost Management**
|
| 316 |
+
- Start with Budget Council for testing
|
| 317 |
+
- Monitor usage on OpenRouter dashboard
|
| 318 |
+
- Set spending limits
|
| 319 |
+
|
| 320 |
+
3. **Quality Optimization**
|
| 321 |
+
- Use Premium Council for important queries
|
| 322 |
+
- Reasoning Council for math/logic problems
|
| 323 |
+
- Adjust timeouts based on model speed
|
| 324 |
+
|
| 325 |
+
## 📚 Additional Resources
|
| 326 |
+
|
| 327 |
+
- [Original LLM Council by Machine Theory](https://github.com/machine-theory/lm-council)
|
| 328 |
+
- [OpenRouter Documentation](https://openrouter.ai/docs)
|
| 329 |
+
- [Gradio Documentation](https://gradio.app/docs)
|
| 330 |
+
- [Hugging Face Spaces Guide](https://huggingface.co/docs/hub/spaces)
|
| 331 |
+
|
| 332 |
+
## 🤝 Contributing
|
| 333 |
+
|
| 334 |
+
Suggestions for improvement:
|
| 335 |
+
1. Add caching for repeated questions
|
| 336 |
+
2. Implement conversation history
|
| 337 |
+
3. Add custom model configurations via UI
|
| 338 |
+
4. Support for different voting mechanisms
|
| 339 |
+
5. Add cost tracking and estimates
|
| 340 |
+
|
| 341 |
+
## 📝 License
|
| 342 |
+
|
| 343 |
+
Check the original repository for license information.
|
IMPROVEMENTS_SUMMARY.md
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 📋 SUMMARY - LLM Council Code Review & Improvements
|
| 2 |
+
|
| 3 |
+
## ✅ What Was Done
|
| 4 |
+
|
| 5 |
+
### 1. **Complete Code Analysis** ✓
|
| 6 |
+
- Analyzed the 3-stage council architecture
|
| 7 |
+
- Identified strengths and weaknesses
|
| 8 |
+
- Reviewed all backend modules
|
| 9 |
+
|
| 10 |
+
### 2. **Created Missing Files** ✓
|
| 11 |
+
- `requirements.txt` - All Python dependencies
|
| 12 |
+
- `.env.example` - Environment variable template
|
| 13 |
+
- `DEPLOYMENT_GUIDE.md` - Comprehensive deployment instructions
|
| 14 |
+
- `CODE_ANALYSIS.md` - Detailed code review
|
| 15 |
+
- `QUICKSTART.md` - Fast setup guide
|
| 16 |
+
|
| 17 |
+
### 3. **Improved Code Files** ✓
|
| 18 |
+
- `backend/config_improved.py` - Better model selection
|
| 19 |
+
- `backend/openrouter_improved.py` - Enhanced error handling & retries
|
| 20 |
+
|
| 21 |
+
## 🎯 Key Improvements
|
| 22 |
+
|
| 23 |
+
### Model Recommendations
|
| 24 |
+
|
| 25 |
+
#### Current (Original) ❌
|
| 26 |
+
```python
|
| 27 |
+
# Using experimental/unstable endpoints
|
| 28 |
+
"openai/gpt-oss-120b:hyperbolic"
|
| 29 |
+
"deepseek-ai/DeepSeek-V3.2-Exp:novita"
|
| 30 |
+
"Qwen/Qwen3-235B-A22B-Instruct-2507:hyperbolic"
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
#### Recommended (Improved) ✅
|
| 34 |
+
```python
|
| 35 |
+
# Stable, latest models from trusted providers
|
| 36 |
+
COUNCIL_MODELS = [
|
| 37 |
+
"deepseek/deepseek-chat", # DeepSeek V3 - excellent reasoning
|
| 38 |
+
"anthropic/claude-3.7-sonnet", # Claude 3.7 - strong analysis
|
| 39 |
+
"openai/gpt-4o", # GPT-4o - reliable & versatile
|
| 40 |
+
"google/gemini-2.0-flash-thinking-exp:free", # Fast thinking
|
| 41 |
+
"qwen/qwq-32b-preview", # Strong reasoning
|
| 42 |
+
]
|
| 43 |
+
|
| 44 |
+
CHAIRMAN_MODEL = "deepseek/deepseek-reasoner" # DeepSeek R1
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
**Why These Models?**
|
| 48 |
+
- ✅ Latest stable versions
|
| 49 |
+
- ✅ Diverse providers (OpenAI, Anthropic, Google, DeepSeek, Qwen)
|
| 50 |
+
- ✅ Proven performance
|
| 51 |
+
- ✅ Good cost/quality balance
|
| 52 |
+
- ✅ Readily available on OpenRouter
|
| 53 |
+
|
| 54 |
+
### Code Enhancements
|
| 55 |
+
|
| 56 |
+
#### Error Handling & Reliability
|
| 57 |
+
```python
|
| 58 |
+
# ✅ Retry logic with exponential backoff
|
| 59 |
+
# ✅ Timeout configuration
|
| 60 |
+
# ✅ Proper error categorization (4xx vs 5xx)
|
| 61 |
+
# ✅ Graceful degradation
|
| 62 |
+
# ✅ Detailed logging
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
#### Configuration Options
|
| 66 |
+
```python
|
| 67 |
+
# ✅ Budget Council (fast & cheap)
|
| 68 |
+
# ✅ Balanced Council (recommended)
|
| 69 |
+
# ✅ Premium Council (maximum quality)
|
| 70 |
+
# ✅ Reasoning Council (complex problems)
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
## 📁 Files Created
|
| 74 |
+
|
| 75 |
+
```
|
| 76 |
+
llm_council/
|
| 77 |
+
├── requirements.txt ✨ NEW - Dependencies
|
| 78 |
+
├── .env.example ✨ NEW - Environment template
|
| 79 |
+
├── QUICKSTART.md ✨ NEW - Fast setup guide
|
| 80 |
+
├── DEPLOYMENT_GUIDE.md ✨ NEW - Full documentation
|
| 81 |
+
├── CODE_ANALYSIS.md ✨ NEW - Code review
|
| 82 |
+
└── backend/
|
| 83 |
+
├── config_improved.py ✨ NEW - Better model config
|
| 84 |
+
└── openrouter_improved.py ✨ NEW - Enhanced API client
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
## 🚀 How to Use
|
| 88 |
+
|
| 89 |
+
### Option 1: Keep Original + Test Improvements
|
| 90 |
+
|
| 91 |
+
The improved files are separate (`*_improved.py`) so you can:
|
| 92 |
+
1. Test new versions alongside originals
|
| 93 |
+
2. Compare performance
|
| 94 |
+
3. Roll back if needed
|
| 95 |
+
|
| 96 |
+
```bash
|
| 97 |
+
# When ready to use improved versions:
|
| 98 |
+
mv backend/config_improved.py backend/config.py
|
| 99 |
+
mv backend/openrouter_improved.py backend/openrouter.py
|
| 100 |
+
```
|
| 101 |
+
|
| 102 |
+
### Option 2: Deploy to Hugging Face Now
|
| 103 |
+
|
| 104 |
+
1. **Fork existing space** at https://huggingface.co/spaces/burtenshaw/karpathy-llm-council
|
| 105 |
+
2. **Add your API key** in Settings → Repository secrets → `OPENROUTER_API_KEY`
|
| 106 |
+
3. **Optional**: Update to improved models by editing `backend/config.py`
|
| 107 |
+
|
| 108 |
+
See `DEPLOYMENT_GUIDE.md` for step-by-step instructions.
|
| 109 |
+
|
| 110 |
+
## 💰 Cost Comparison
|
| 111 |
+
|
| 112 |
+
| Configuration | Cost/Query | Speed | Quality |
|
| 113 |
+
|--------------|------------|-------|---------|
|
| 114 |
+
| **Budget Council** | $0.01-0.03 | Fast (30-60s) | Good |
|
| 115 |
+
| **Balanced Council** | $0.05-0.15 | Medium (45-90s) | Very Good |
|
| 116 |
+
| **Premium Council** | $0.20-0.50 | Slow (60-135s) | Excellent |
|
| 117 |
+
|
| 118 |
+
## 📊 Architecture Understanding
|
| 119 |
+
|
| 120 |
+
### 3-Stage Process
|
| 121 |
+
|
| 122 |
+
```
|
| 123 |
+
┌─────────────────────────────────────────────┐
|
| 124 |
+
│ USER QUESTION │
|
| 125 |
+
└──────────────┬──────────────────────────────┘
|
| 126 |
+
│
|
| 127 |
+
▼
|
| 128 |
+
┌─────────────────────────────────────────────┐
|
| 129 |
+
│ STAGE 1: Individual Responses (Parallel) │
|
| 130 |
+
│ • DeepSeek answers │
|
| 131 |
+
│ • Claude answers │
|
| 132 |
+
│ • GPT-4o answers │
|
| 133 |
+
│ • Gemini answers │
|
| 134 |
+
│ • QwQ answers │
|
| 135 |
+
└──────────────┬──────────────────────────────┘
|
| 136 |
+
│
|
| 137 |
+
▼
|
| 138 |
+
┌─────────────────────────────────────────────┐
|
| 139 |
+
│ STAGE 2: Peer Rankings (Anonymous) │
|
| 140 |
+
│ • Each model ranks "Response A, B, C..." │
|
| 141 |
+
│ • Aggregate rankings calculated │
|
| 142 |
+
└──────────────┬──────────────────────────────┘
|
| 143 |
+
│
|
| 144 |
+
▼
|
| 145 |
+
┌─────────────────────────────────────────────┐
|
| 146 |
+
│ STAGE 3: Chairman Synthesis │
|
| 147 |
+
│ • DeepSeek Reasoner reviews all │
|
| 148 |
+
│ • Considers responses + rankings │
|
| 149 |
+
│ • Generates final comprehensive answer │
|
| 150 |
+
└─────────────────────────────────────────────┘
|
| 151 |
+
```
|
| 152 |
+
|
| 153 |
+
### Why This Works
|
| 154 |
+
|
| 155 |
+
1. **Stage 1 Diversity**: Different models have different strengths
|
| 156 |
+
2. **Stage 2 Validation**: Anonymous ranking reduces bias
|
| 157 |
+
3. **Stage 3 Synthesis**: Chairman combines best insights
|
| 158 |
+
|
| 159 |
+
## 🎯 Next Steps
|
| 160 |
+
|
| 161 |
+
### Immediate
|
| 162 |
+
1. ✅ Review `QUICKSTART.md` for setup
|
| 163 |
+
2. ✅ Test locally with your API key
|
| 164 |
+
3. ✅ Deploy to HuggingFace Spaces
|
| 165 |
+
|
| 166 |
+
### Short-term
|
| 167 |
+
1. Compare original vs improved models
|
| 168 |
+
2. Monitor costs and performance
|
| 169 |
+
3. Adjust configuration to your needs
|
| 170 |
+
|
| 171 |
+
### Long-term
|
| 172 |
+
1. Add caching for repeated questions
|
| 173 |
+
2. Implement conversation history
|
| 174 |
+
3. Add custom model selection UI
|
| 175 |
+
4. Track quality metrics
|
| 176 |
+
|
| 177 |
+
## 📚 Documentation Map
|
| 178 |
+
|
| 179 |
+
- **`QUICKSTART.md`** → Fast 5-minute setup
|
| 180 |
+
- **`DEPLOYMENT_GUIDE.md`** → Complete deployment guide
|
| 181 |
+
- **`CODE_ANALYSIS.md`** → Detailed code review
|
| 182 |
+
- **`README.md`** → Original project info
|
| 183 |
+
|
| 184 |
+
## ✨ Key Takeaways
|
| 185 |
+
|
| 186 |
+
### What's Good (Original)
|
| 187 |
+
- ✅ Clean architecture
|
| 188 |
+
- ✅ Smart 3-stage design
|
| 189 |
+
- ✅ Async parallel processing
|
| 190 |
+
- ✅ Good Gradio integration
|
| 191 |
+
|
| 192 |
+
### What Was Missing
|
| 193 |
+
- ❌ Error handling & retries
|
| 194 |
+
- ❌ Stable model selection
|
| 195 |
+
- ❌ Configuration flexibility
|
| 196 |
+
- ❌ Deployment documentation
|
| 197 |
+
|
| 198 |
+
### What's Fixed (Improved)
|
| 199 |
+
- ✅ Robust error handling
|
| 200 |
+
- ✅ Latest stable models
|
| 201 |
+
- ✅ Multiple config presets
|
| 202 |
+
- ✅ Comprehensive docs
|
| 203 |
+
|
| 204 |
+
## 🏁 You're Ready!
|
| 205 |
+
|
| 206 |
+
Everything you need is now in your workspace:
|
| 207 |
+
|
| 208 |
+
```bash
|
| 209 |
+
z:\projects\llm_council\
|
| 210 |
+
```
|
| 211 |
+
|
| 212 |
+
**Start here**: Open `QUICKSTART.md` for immediate setup instructions.
|
| 213 |
+
|
| 214 |
+
**Questions?** Check `DEPLOYMENT_GUIDE.md` for comprehensive information.
|
| 215 |
+
|
| 216 |
+
Good luck with your LLM Council! 🚀
|
QUICKSTART.md
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🚀 Quick Start Guide - LLM Council
|
| 2 |
+
|
| 3 |
+
## 📦 What You Have
|
| 4 |
+
|
| 5 |
+
A sophisticated multi-LLM system where multiple AI models:
|
| 6 |
+
1. **Individually answer** your question
|
| 7 |
+
2. **Rank each other's** responses anonymously
|
| 8 |
+
3. **Synthesize** a final best answer
|
| 9 |
+
|
| 10 |
+
## ⚡ Quick Setup (5 minutes)
|
| 11 |
+
|
| 12 |
+
### 1️⃣ Get OpenRouter API Key
|
| 13 |
+
1. Go to [openrouter.ai](https://openrouter.ai/)
|
| 14 |
+
2. Sign up / Login
|
| 15 |
+
3. Go to Keys → Create new key
|
| 16 |
+
4. Copy your API key
|
| 17 |
+
|
| 18 |
+
### 2️⃣ Set Up Locally
|
| 19 |
+
|
| 20 |
+
```bash
|
| 21 |
+
# Install dependencies
|
| 22 |
+
pip install -r requirements.txt
|
| 23 |
+
|
| 24 |
+
# Create environment file
|
| 25 |
+
cp .env.example .env
|
| 26 |
+
|
| 27 |
+
# Edit .env and add your API key
|
| 28 |
+
# OPENROUTER_API_KEY=your_key_here
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
### 3️⃣ Run It!
|
| 32 |
+
|
| 33 |
+
```bash
|
| 34 |
+
python app.py
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
Visit `http://localhost:7860` 🎉
|
| 38 |
+
|
| 39 |
+
## 🌐 Deploy to Hugging Face Spaces (FREE)
|
| 40 |
+
|
| 41 |
+
### Option A: Fork Existing Space
|
| 42 |
+
1. Visit: https://huggingface.co/spaces/burtenshaw/karpathy-llm-council
|
| 43 |
+
2. Click "⋮" → "Duplicate this Space"
|
| 44 |
+
3. Settings → Repository secrets → Add `OPENROUTER_API_KEY`
|
| 45 |
+
4. Done! Your space will auto-deploy
|
| 46 |
+
|
| 47 |
+
### Option B: Create New Space
|
| 48 |
+
1. Go to [huggingface.co/new-space](https://huggingface.co/new-space)
|
| 49 |
+
2. Choose Gradio SDK 6.0.0
|
| 50 |
+
3. Clone and push your code:
|
| 51 |
+
```bash
|
| 52 |
+
git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE
|
| 53 |
+
cd YOUR_SPACE
|
| 54 |
+
cp -r ../llm_council/* .
|
| 55 |
+
git add .
|
| 56 |
+
git commit -m "Initial commit"
|
| 57 |
+
git push
|
| 58 |
+
```
|
| 59 |
+
4. Settings → Repository secrets → Add `OPENROUTER_API_KEY`
|
| 60 |
+
|
| 61 |
+
## 🎯 Usage Examples
|
| 62 |
+
|
| 63 |
+
### Simple Question
|
| 64 |
+
```
|
| 65 |
+
Question: What is the capital of France?
|
| 66 |
+
⏱️ Response time: ~30 seconds
|
| 67 |
+
💰 Cost: ~$0.01
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
### Complex Analysis
|
| 71 |
+
```
|
| 72 |
+
Question: Compare pros and cons of renewable energy
|
| 73 |
+
⏱️ Response time: ~90 seconds
|
| 74 |
+
💰 Cost: ~$0.07
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
## 🔧 Use Improved Models
|
| 78 |
+
|
| 79 |
+
Replace these files to use latest stable models:
|
| 80 |
+
|
| 81 |
+
```bash
|
| 82 |
+
# Backup originals
|
| 83 |
+
mv backend/config.py backend/config_old.py
|
| 84 |
+
mv backend/openrouter.py backend/openrouter_old.py
|
| 85 |
+
|
| 86 |
+
# Use improved versions
|
| 87 |
+
mv backend/config_improved.py backend/config.py
|
| 88 |
+
mv backend/openrouter_improved.py backend/openrouter.py
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
**Improved models:**
|
| 92 |
+
- DeepSeek V3 (Chat & Reasoner)
|
| 93 |
+
- Claude 3.7 Sonnet
|
| 94 |
+
- GPT-4o
|
| 95 |
+
- Gemini 2.0 Flash Thinking
|
| 96 |
+
- QwQ 32B
|
| 97 |
+
|
| 98 |
+
## 📊 Monitor Usage
|
| 99 |
+
|
| 100 |
+
Check your costs at: [openrouter.ai/activity](https://openrouter.ai/activity)
|
| 101 |
+
|
| 102 |
+
Typical costs:
|
| 103 |
+
- Budget Council: $0.01-0.03 per query
|
| 104 |
+
- Balanced Council: $0.05-0.15 per query
|
| 105 |
+
- Premium Council: $0.20-0.50 per query
|
| 106 |
+
|
| 107 |
+
## ❓ Troubleshooting
|
| 108 |
+
|
| 109 |
+
**"All models failed to respond"**
|
| 110 |
+
- ✅ Check API key in .env
|
| 111 |
+
- ✅ Verify OpenRouter credit balance
|
| 112 |
+
- ✅ Test API key: https://openrouter.ai/playground
|
| 113 |
+
|
| 114 |
+
**Space won't start on HF**
|
| 115 |
+
- ✅ Check logs in Space → Logs tab
|
| 116 |
+
- ✅ Verify secret name is exact: `OPENROUTER_API_KEY`
|
| 117 |
+
- ✅ Ensure requirements.txt is present
|
| 118 |
+
|
| 119 |
+
**Slow responses**
|
| 120 |
+
- ✅ Normal! 3 stages take 45-135 seconds
|
| 121 |
+
- ✅ Use Budget Council for faster results
|
| 122 |
+
- ✅ Reduce number of council members
|
| 123 |
+
|
| 124 |
+
## 📚 Full Documentation
|
| 125 |
+
|
| 126 |
+
- **Complete Guide**: See `DEPLOYMENT_GUIDE.md`
|
| 127 |
+
- **Code Analysis**: See `CODE_ANALYSIS.md`
|
| 128 |
+
- **Original Project**: https://github.com/machine-theory/lm-council
|
| 129 |
+
|
| 130 |
+
## 💡 Tips
|
| 131 |
+
|
| 132 |
+
1. **Start with Budget Council** to test without spending much
|
| 133 |
+
2. **Use Premium Council** for important questions
|
| 134 |
+
3. **Monitor costs** in OpenRouter dashboard
|
| 135 |
+
4. **Set spending limits** to avoid surprises
|
| 136 |
+
|
| 137 |
+
## 🎨 Customization
|
| 138 |
+
|
| 139 |
+
Edit `backend/config.py` to:
|
| 140 |
+
- Change council models
|
| 141 |
+
- Adjust chairman model
|
| 142 |
+
- Modify timeouts
|
| 143 |
+
- Configure retries
|
| 144 |
+
|
| 145 |
+
See `DEPLOYMENT_GUIDE.md` for preset configurations!
|
| 146 |
+
|
| 147 |
+
---
|
| 148 |
+
|
| 149 |
+
**Need Help?** Check `DEPLOYMENT_GUIDE.md` for comprehensive documentation.
|
README.md
CHANGED
|
@@ -9,4 +9,148 @@ app_file: app.py
|
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# 🏢 LLM Council - Multi-Model AI Deliberation System
|
| 13 |
+
|
| 14 |
+
A sophisticated system where multiple LLMs collaboratively answer questions through a 3-stage deliberation process, inspired by [Andrej Karpathy's LLM Council](https://github.com/machine-theory/lm-council).
|
| 15 |
+
|
| 16 |
+
## 🎯 How It Works
|
| 17 |
+
|
| 18 |
+
1. **Stage 1 - Individual Responses**: 5 different AI models independently answer your question
|
| 19 |
+
2. **Stage 2 - Peer Review**: Each model ranks the anonymized responses from others
|
| 20 |
+
3. **Stage 3 - Synthesis**: A chairman model synthesizes the final answer based on all inputs
|
| 21 |
+
|
| 22 |
+
## 💰 Cost: Mostly FREE!
|
| 23 |
+
|
| 24 |
+
This version uses **FREE HuggingFace Inference API** models:
|
| 25 |
+
- ✅ Meta Llama 3.3 70B (FREE)
|
| 26 |
+
- ✅ Qwen 2.5 72B (FREE)
|
| 27 |
+
- ✅ Mixtral 8x7B (FREE)
|
| 28 |
+
- 💵 OpenAI GPT-4o-mini (low cost)
|
| 29 |
+
- 💵 OpenAI GPT-3.5-turbo (low cost)
|
| 30 |
+
|
| 31 |
+
**Cost per query**: ~$0.01-0.03 (mostly OpenAI, HF is free!)
|
| 32 |
+
|
| 33 |
+
## ⚡ Quick Start
|
| 34 |
+
|
| 35 |
+
### 🚀 Deploy to Hugging Face (Recommended)
|
| 36 |
+
|
| 37 |
+
1. **Fork/Duplicate this Space**
|
| 38 |
+
2. **Add your API keys** in Settings → Repository secrets:
|
| 39 |
+
- `OPENAI_API_KEY` - Get from [OpenAI](https://platform.openai.com/api-keys)
|
| 40 |
+
- `HUGGINGFACE_API_KEY` - Get from [HuggingFace](https://huggingface.co/settings/tokens)
|
| 41 |
+
3. **Done!** Your space will auto-deploy
|
| 42 |
+
|
| 43 |
+
### 💻 Run Locally
|
| 44 |
+
|
| 45 |
+
```bash
|
| 46 |
+
# Clone repository
|
| 47 |
+
git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME
|
| 48 |
+
cd YOUR_SPACE_NAME
|
| 49 |
+
|
| 50 |
+
# Install dependencies
|
| 51 |
+
pip install -r requirements.txt
|
| 52 |
+
|
| 53 |
+
# Create .env file with your API keys
|
| 54 |
+
cp .env.example .env
|
| 55 |
+
# Edit .env and add:
|
| 56 |
+
# OPENAI_API_KEY=your_openai_key
|
| 57 |
+
# HUGGINGFACE_API_KEY=your_hf_token
|
| 58 |
+
|
| 59 |
+
# Run the app
|
| 60 |
+
python app.py
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
Visit `http://localhost:7860`
|
| 64 |
+
|
| 65 |
+
## 🔑 Getting API Keys
|
| 66 |
+
|
| 67 |
+
### OpenAI API Key (Required)
|
| 68 |
+
1. Go to https://platform.openai.com/api-keys
|
| 69 |
+
2. Create new secret key
|
| 70 |
+
3. Copy and save it (costs ~$0.01-0.03 per query)
|
| 71 |
+
|
| 72 |
+
### HuggingFace Token (Required for FREE models)
|
| 73 |
+
1. Go to https://huggingface.co/settings/tokens
|
| 74 |
+
2. Create new token (read access is enough)
|
| 75 |
+
3. Copy and save it (100% FREE to use!)
|
| 76 |
+
|
| 77 |
+
## 🤖 Council Models
|
| 78 |
+
|
| 79 |
+
### Current Configuration
|
| 80 |
+
- **Meta Llama 3.3 70B** - Excellent reasoning, FREE
|
| 81 |
+
- **Qwen 2.5 72B** - Strong performance, FREE
|
| 82 |
+
- **Mixtral 8x7B** - Mixture of experts, FREE
|
| 83 |
+
- **OpenAI GPT-4o-mini** - Fast & capable, low cost
|
| 84 |
+
- **OpenAI GPT-3.5-turbo** - Reliable, low cost
|
| 85 |
+
|
| 86 |
+
### Chairman
|
| 87 |
+
- **OpenAI GPT-4o-mini** - Excellent synthesis capabilities
|
| 88 |
+
|
| 89 |
+
Want to customize? Edit `backend/config_free.py`!
|
| 90 |
+
|
| 91 |
+
## 📊 Performance
|
| 92 |
+
|
| 93 |
+
- **Response Time**: 60-120 seconds (3 stages, parallel processing)
|
| 94 |
+
- **Quality**: Better than single-model responses
|
| 95 |
+
- **Cost**: ~$0.01-0.03 per query (mostly FREE!)
|
| 96 |
+
- **Reliability**: Automatic retries & error handling
|
| 97 |
+
|
| 98 |
+
## 🛠️ Tech Stack
|
| 99 |
+
|
| 100 |
+
- **Frontend**: Gradio 6.0+ (with MCP server support)
|
| 101 |
+
- **Backend**: Python async/await
|
| 102 |
+
- **APIs**:
|
| 103 |
+
- HuggingFace Inference API (FREE models)
|
| 104 |
+
- OpenAI API (paid models)
|
| 105 |
+
- **Storage**: JSON-based conversation persistence
|
| 106 |
+
|
| 107 |
+
## 📁 Project Structure
|
| 108 |
+
|
| 109 |
+
```
|
| 110 |
+
llm_council/
|
| 111 |
+
├── app.py # Main Gradio interface
|
| 112 |
+
├── requirements.txt # Python dependencies
|
| 113 |
+
├── .env.example # Environment template
|
| 114 |
+
├── backend/
|
| 115 |
+
│ ├── config_free.py # FREE model configuration
|
| 116 |
+
│ ├── api_client.py # HF + OpenAI API client
|
| 117 |
+
│ ├── council_free.py # 3-stage orchestration
|
| 118 |
+
│ ├── storage.py # Conversation storage
|
| 119 |
+
│ └── main.py # FastAPI backend (optional)
|
| 120 |
+
└── docs/
|
| 121 |
+
├── QUICKSTART.md
|
| 122 |
+
├── DEPLOYMENT_GUIDE.md
|
| 123 |
+
└── CODE_ANALYSIS.md
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
## 🔧 Configuration
|
| 127 |
+
|
| 128 |
+
Want different models? Edit `backend/config_free.py`:
|
| 129 |
+
|
| 130 |
+
```python
|
| 131 |
+
# Use ALL FREE models (no OpenAI cost):
|
| 132 |
+
COUNCIL_MODELS = [
|
| 133 |
+
{"id": "meta-llama/Llama-3.3-70B-Instruct", "provider": "huggingface"},
|
| 134 |
+
{"id": "Qwen/Qwen2.5-72B-Instruct", "provider": "huggingface"},
|
| 135 |
+
{"id": "mistralai/Mixtral-8x7B-Instruct-v0.1", "provider": "huggingface"},
|
| 136 |
+
{"id": "google/gemma-2-27b-it", "provider": "huggingface"},
|
| 137 |
+
]
|
| 138 |
+
```
|
| 139 |
+
|
| 140 |
+
## 🤝 Contributing
|
| 141 |
+
|
| 142 |
+
Improvements welcome! See `CODE_ANALYSIS.md` for refactoring suggestions.
|
| 143 |
+
|
| 144 |
+
## 📝 Credits
|
| 145 |
+
|
| 146 |
+
- Original concept: [Machine Theory](https://github.com/machine-theory/lm-council) & [Andrej Karpathy](https://github.com/karpathy)
|
| 147 |
+
- Implementation: Community contributions
|
| 148 |
+
- FREE models: Meta, Qwen, Mistral via HuggingFace
|
| 149 |
+
|
| 150 |
+
## 📄 License
|
| 151 |
+
|
| 152 |
+
See original repository for license information.
|
| 153 |
+
|
| 154 |
+
---
|
| 155 |
+
|
| 156 |
+
**Need Help?** Check the docs folder for detailed guides!
|
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from backend.
|
| 3 |
-
from backend.
|
| 4 |
|
| 5 |
|
| 6 |
async def ask_council(question: str, progress=gr.Progress()):
|
|
@@ -19,7 +19,8 @@ async def ask_council(question: str, progress=gr.Progress()):
|
|
| 19 |
Yields:
|
| 20 |
Status updates and finally the synthesized answer.
|
| 21 |
""".format(
|
| 22 |
-
models=", ".join([m.split("/")[-1] for m in COUNCIL_MODELS]),
|
|
|
|
| 23 |
)
|
| 24 |
|
| 25 |
try:
|
|
@@ -90,10 +91,23 @@ async def ask_council(question: str, progress=gr.Progress()):
|
|
| 90 |
|
| 91 |
|
| 92 |
description = """
|
| 93 |
-
An
|
| 94 |
-
and Andrej Karpathy.
|
| 95 |
-
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
"""
|
| 98 |
|
| 99 |
demo = gr.Interface(
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from backend.council_free import stage1_collect_responses, stage2_collect_rankings, stage3_synthesize_final_stream
|
| 3 |
+
from backend.config_free import COUNCIL_MODELS, CHAIRMAN_MODEL
|
| 4 |
|
| 5 |
|
| 6 |
async def ask_council(question: str, progress=gr.Progress()):
|
|
|
|
| 19 |
Yields:
|
| 20 |
Status updates and finally the synthesized answer.
|
| 21 |
""".format(
|
| 22 |
+
models=", ".join([m["id"].split("/")[-1] for m in COUNCIL_MODELS]),
|
| 23 |
+
chairman=CHAIRMAN_MODEL["id"].split("/")[-1]
|
| 24 |
)
|
| 25 |
|
| 26 |
try:
|
|
|
|
| 91 |
|
| 92 |
|
| 93 |
description = """
|
| 94 |
+
An LLM Council that consults multiple AI models to answer questions. Based on [LLM Council](https://github.com/machine-theory/lm-council) by Machine Theory
|
| 95 |
+
and Andrej Karpathy.
|
| 96 |
+
|
| 97 |
+
🎯 **Council Members**: Mix of FREE HuggingFace models + OpenAI models
|
| 98 |
+
- Meta Llama 3.3 70B
|
| 99 |
+
- Qwen 2.5 72B
|
| 100 |
+
- Mixtral 8x7B
|
| 101 |
+
- OpenAI GPT-4o-mini
|
| 102 |
+
- OpenAI GPT-3.5-turbo
|
| 103 |
+
|
| 104 |
+
💡 **How it works**:
|
| 105 |
+
1. Each model answers your question independently
|
| 106 |
+
2. Models rank each other's responses anonymously
|
| 107 |
+
3. Chairman synthesizes the best final answer
|
| 108 |
+
|
| 109 |
+
⏱️ Takes ~1-2 minutes per question (3 stages)
|
| 110 |
+
💰 Uses mostly FREE models!
|
| 111 |
"""
|
| 112 |
|
| 113 |
demo = gr.Interface(
|
backend/api_client.py
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""API client for HuggingFace Inference API and OpenAI."""
|
| 2 |
+
|
| 3 |
+
import httpx
|
| 4 |
+
import asyncio
|
| 5 |
+
from typing import List, Dict, Any, Optional
|
| 6 |
+
from .config_free import (
|
| 7 |
+
OPENAI_API_KEY,
|
| 8 |
+
HUGGINGFACE_API_KEY,
|
| 9 |
+
DEFAULT_TIMEOUT,
|
| 10 |
+
MAX_RETRIES,
|
| 11 |
+
RETRY_DELAY
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
async def query_openai_model(
|
| 16 |
+
model: str,
|
| 17 |
+
messages: List[Dict[str, str]],
|
| 18 |
+
timeout: float = DEFAULT_TIMEOUT,
|
| 19 |
+
max_retries: int = MAX_RETRIES
|
| 20 |
+
) -> Optional[Dict[str, Any]]:
|
| 21 |
+
"""
|
| 22 |
+
Query an OpenAI model.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
model: OpenAI model name (e.g., "gpt-4o-mini")
|
| 26 |
+
messages: List of message dicts with 'role' and 'content'
|
| 27 |
+
timeout: Request timeout in seconds
|
| 28 |
+
max_retries: Maximum retry attempts
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
Response dict with 'content', or None if failed
|
| 32 |
+
"""
|
| 33 |
+
headers = {
|
| 34 |
+
"Authorization": f"Bearer {OPENAI_API_KEY}",
|
| 35 |
+
"Content-Type": "application/json",
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
payload = {
|
| 39 |
+
"model": model,
|
| 40 |
+
"messages": messages,
|
| 41 |
+
"temperature": 0.7,
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
for attempt in range(max_retries + 1):
|
| 45 |
+
try:
|
| 46 |
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
| 47 |
+
response = await client.post(
|
| 48 |
+
"https://api.openai.com/v1/chat/completions",
|
| 49 |
+
headers=headers,
|
| 50 |
+
json=payload
|
| 51 |
+
)
|
| 52 |
+
response.raise_for_status()
|
| 53 |
+
|
| 54 |
+
data = response.json()
|
| 55 |
+
content = data["choices"][0]["message"]["content"]
|
| 56 |
+
|
| 57 |
+
return {"content": content}
|
| 58 |
+
|
| 59 |
+
except httpx.TimeoutException as e:
|
| 60 |
+
print(f"⏱️ Timeout querying OpenAI {model} (attempt {attempt + 1}/{max_retries + 1})")
|
| 61 |
+
if attempt < max_retries:
|
| 62 |
+
await asyncio.sleep(RETRY_DELAY * (attempt + 1))
|
| 63 |
+
continue
|
| 64 |
+
return None
|
| 65 |
+
|
| 66 |
+
except httpx.HTTPStatusError as e:
|
| 67 |
+
print(f"🚫 HTTP error querying OpenAI {model}: {e.response.status_code}")
|
| 68 |
+
if 400 <= e.response.status_code < 500:
|
| 69 |
+
return None
|
| 70 |
+
if attempt < max_retries:
|
| 71 |
+
await asyncio.sleep(RETRY_DELAY * (attempt + 1))
|
| 72 |
+
continue
|
| 73 |
+
return None
|
| 74 |
+
|
| 75 |
+
except Exception as e:
|
| 76 |
+
print(f"❌ Error querying OpenAI {model}: {e}")
|
| 77 |
+
if attempt < max_retries:
|
| 78 |
+
await asyncio.sleep(RETRY_DELAY)
|
| 79 |
+
continue
|
| 80 |
+
return None
|
| 81 |
+
|
| 82 |
+
return None
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
async def query_huggingface_model(
|
| 86 |
+
model: str,
|
| 87 |
+
messages: List[Dict[str, str]],
|
| 88 |
+
timeout: float = DEFAULT_TIMEOUT,
|
| 89 |
+
max_retries: int = MAX_RETRIES
|
| 90 |
+
) -> Optional[Dict[str, Any]]:
|
| 91 |
+
"""
|
| 92 |
+
Query a HuggingFace model via Inference API (FREE).
|
| 93 |
+
|
| 94 |
+
Args:
|
| 95 |
+
model: HuggingFace model ID (e.g., "meta-llama/Llama-3.3-70B-Instruct")
|
| 96 |
+
messages: List of message dicts with 'role' and 'content'
|
| 97 |
+
timeout: Request timeout in seconds
|
| 98 |
+
max_retries: Maximum retry attempts
|
| 99 |
+
|
| 100 |
+
Returns:
|
| 101 |
+
Response dict with 'content', or None if failed
|
| 102 |
+
"""
|
| 103 |
+
headers = {
|
| 104 |
+
"Authorization": f"Bearer {HUGGINGFACE_API_KEY}",
|
| 105 |
+
"Content-Type": "application/json",
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
# Convert messages to prompt format for HuggingFace
|
| 109 |
+
prompt = format_messages_for_hf(messages)
|
| 110 |
+
|
| 111 |
+
payload = {
|
| 112 |
+
"inputs": prompt,
|
| 113 |
+
"parameters": {
|
| 114 |
+
"max_new_tokens": 2048,
|
| 115 |
+
"temperature": 0.7,
|
| 116 |
+
"top_p": 0.9,
|
| 117 |
+
"do_sample": True,
|
| 118 |
+
}
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
api_url = f"https://api-inference.huggingface.co/models/{model}"
|
| 122 |
+
|
| 123 |
+
for attempt in range(max_retries + 1):
|
| 124 |
+
try:
|
| 125 |
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
| 126 |
+
response = await client.post(api_url, headers=headers, json=payload)
|
| 127 |
+
response.raise_for_status()
|
| 128 |
+
|
| 129 |
+
data = response.json()
|
| 130 |
+
|
| 131 |
+
# Handle different response formats
|
| 132 |
+
if isinstance(data, list) and len(data) > 0:
|
| 133 |
+
content = data[0].get("generated_text", "")
|
| 134 |
+
# Remove the prompt from the response
|
| 135 |
+
if content.startswith(prompt):
|
| 136 |
+
content = content[len(prompt):].strip()
|
| 137 |
+
elif isinstance(data, dict):
|
| 138 |
+
content = data.get("generated_text", "")
|
| 139 |
+
if content.startswith(prompt):
|
| 140 |
+
content = content[len(prompt):].strip()
|
| 141 |
+
else:
|
| 142 |
+
content = str(data)
|
| 143 |
+
|
| 144 |
+
return {"content": content}
|
| 145 |
+
|
| 146 |
+
except httpx.TimeoutException as e:
|
| 147 |
+
print(f"⏱️ Timeout querying HF {model} (attempt {attempt + 1}/{max_retries + 1})")
|
| 148 |
+
if attempt < max_retries:
|
| 149 |
+
await asyncio.sleep(RETRY_DELAY * (attempt + 1))
|
| 150 |
+
continue
|
| 151 |
+
return None
|
| 152 |
+
|
| 153 |
+
except httpx.HTTPStatusError as e:
|
| 154 |
+
error_msg = e.response.text
|
| 155 |
+
print(f"🚫 HTTP {e.response.status_code} querying HF {model}: {error_msg[:100]}")
|
| 156 |
+
|
| 157 |
+
# Model is loading - retry with longer delay
|
| 158 |
+
if "loading" in error_msg.lower():
|
| 159 |
+
print(f"⏳ Model is loading, waiting 20s...")
|
| 160 |
+
await asyncio.sleep(20)
|
| 161 |
+
if attempt < max_retries:
|
| 162 |
+
continue
|
| 163 |
+
|
| 164 |
+
# Don't retry on client errors (except loading)
|
| 165 |
+
if 400 <= e.response.status_code < 500:
|
| 166 |
+
return None
|
| 167 |
+
|
| 168 |
+
if attempt < max_retries:
|
| 169 |
+
await asyncio.sleep(RETRY_DELAY * (attempt + 1))
|
| 170 |
+
continue
|
| 171 |
+
return None
|
| 172 |
+
|
| 173 |
+
except Exception as e:
|
| 174 |
+
print(f"❌ Error querying HF {model}: {e}")
|
| 175 |
+
if attempt < max_retries:
|
| 176 |
+
await asyncio.sleep(RETRY_DELAY)
|
| 177 |
+
continue
|
| 178 |
+
return None
|
| 179 |
+
|
| 180 |
+
return None
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def format_messages_for_hf(messages: List[Dict[str, str]]) -> str:
|
| 184 |
+
"""
|
| 185 |
+
Format chat messages for HuggingFace models.
|
| 186 |
+
|
| 187 |
+
Args:
|
| 188 |
+
messages: List of message dicts with 'role' and 'content'
|
| 189 |
+
|
| 190 |
+
Returns:
|
| 191 |
+
Formatted prompt string
|
| 192 |
+
"""
|
| 193 |
+
# Use common chat template format
|
| 194 |
+
prompt = ""
|
| 195 |
+
for msg in messages:
|
| 196 |
+
role = msg["role"]
|
| 197 |
+
content = msg["content"]
|
| 198 |
+
|
| 199 |
+
if role == "system":
|
| 200 |
+
prompt += f"<|system|>\n{content}\n"
|
| 201 |
+
elif role == "user":
|
| 202 |
+
prompt += f"<|user|>\n{content}\n"
|
| 203 |
+
elif role == "assistant":
|
| 204 |
+
prompt += f"<|assistant|>\n{content}\n"
|
| 205 |
+
|
| 206 |
+
# Add assistant prefix for response
|
| 207 |
+
prompt += "<|assistant|>\n"
|
| 208 |
+
|
| 209 |
+
return prompt
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
async def query_model(
|
| 213 |
+
model_config: Dict[str, str],
|
| 214 |
+
messages: List[Dict[str, str]],
|
| 215 |
+
timeout: float = DEFAULT_TIMEOUT
|
| 216 |
+
) -> Optional[Dict[str, Any]]:
|
| 217 |
+
"""
|
| 218 |
+
Query a model based on its configuration (provider-agnostic).
|
| 219 |
+
|
| 220 |
+
Args:
|
| 221 |
+
model_config: Dict with 'provider' and 'model' keys
|
| 222 |
+
messages: List of message dicts
|
| 223 |
+
timeout: Request timeout
|
| 224 |
+
|
| 225 |
+
Returns:
|
| 226 |
+
Response dict or None
|
| 227 |
+
"""
|
| 228 |
+
provider = model_config["provider"]
|
| 229 |
+
model = model_config["model"]
|
| 230 |
+
|
| 231 |
+
if provider == "openai":
|
| 232 |
+
return await query_openai_model(model, messages, timeout)
|
| 233 |
+
elif provider == "huggingface":
|
| 234 |
+
return await query_huggingface_model(model, messages, timeout)
|
| 235 |
+
else:
|
| 236 |
+
print(f"❌ Unknown provider: {provider}")
|
| 237 |
+
return None
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
async def query_model_stream(
|
| 241 |
+
model_config: Dict[str, str],
|
| 242 |
+
messages: List[Dict[str, str]],
|
| 243 |
+
timeout: float = DEFAULT_TIMEOUT
|
| 244 |
+
):
|
| 245 |
+
"""
|
| 246 |
+
Query a model and stream the response.
|
| 247 |
+
|
| 248 |
+
Args:
|
| 249 |
+
model_config: Dict with 'provider' and 'model' keys
|
| 250 |
+
messages: List of message dicts
|
| 251 |
+
timeout: Request timeout
|
| 252 |
+
|
| 253 |
+
Yields:
|
| 254 |
+
Content chunks
|
| 255 |
+
"""
|
| 256 |
+
provider = model_config["provider"]
|
| 257 |
+
model = model_config["model"]
|
| 258 |
+
|
| 259 |
+
if provider == "openai":
|
| 260 |
+
async for chunk in stream_openai_model(model, messages, timeout):
|
| 261 |
+
yield chunk
|
| 262 |
+
elif provider == "huggingface":
|
| 263 |
+
# HF Inference API doesn't support streaming well, fallback to full response
|
| 264 |
+
response = await query_huggingface_model(model, messages, timeout)
|
| 265 |
+
if response:
|
| 266 |
+
yield response["content"]
|
| 267 |
+
else:
|
| 268 |
+
yield "[Error: Failed to get response]"
|
| 269 |
+
else:
|
| 270 |
+
yield f"[Error: Unknown provider {provider}]"
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
async def stream_openai_model(
|
| 274 |
+
model: str,
|
| 275 |
+
messages: List[Dict[str, str]],
|
| 276 |
+
timeout: float = DEFAULT_TIMEOUT
|
| 277 |
+
):
|
| 278 |
+
"""Stream OpenAI model response."""
|
| 279 |
+
headers = {
|
| 280 |
+
"Authorization": f"Bearer {OPENAI_API_KEY}",
|
| 281 |
+
"Content-Type": "application/json",
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
payload = {
|
| 285 |
+
"model": model,
|
| 286 |
+
"messages": messages,
|
| 287 |
+
"temperature": 0.7,
|
| 288 |
+
"stream": True,
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
import json
|
| 292 |
+
|
| 293 |
+
try:
|
| 294 |
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
| 295 |
+
async with client.stream(
|
| 296 |
+
"POST",
|
| 297 |
+
"https://api.openai.com/v1/chat/completions",
|
| 298 |
+
headers=headers,
|
| 299 |
+
json=payload
|
| 300 |
+
) as response:
|
| 301 |
+
response.raise_for_status()
|
| 302 |
+
async for line in response.aiter_lines():
|
| 303 |
+
if line.startswith("data: "):
|
| 304 |
+
data_str = line[6:]
|
| 305 |
+
if data_str.strip() == "[DONE]":
|
| 306 |
+
break
|
| 307 |
+
try:
|
| 308 |
+
data = json.loads(data_str)
|
| 309 |
+
delta = data["choices"][0]["delta"]
|
| 310 |
+
content = delta.get("content")
|
| 311 |
+
if content:
|
| 312 |
+
yield content
|
| 313 |
+
except (json.JSONDecodeError, KeyError):
|
| 314 |
+
pass
|
| 315 |
+
except Exception as e:
|
| 316 |
+
print(f"❌ Error streaming OpenAI {model}: {e}")
|
| 317 |
+
yield f"\n[Error: {str(e)}]"
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
async def query_models_parallel(
|
| 321 |
+
model_configs: List[Dict[str, str]],
|
| 322 |
+
messages: List[Dict[str, str]],
|
| 323 |
+
timeout: float = DEFAULT_TIMEOUT
|
| 324 |
+
) -> Dict[str, Optional[Dict[str, Any]]]:
|
| 325 |
+
"""
|
| 326 |
+
Query multiple models in parallel.
|
| 327 |
+
|
| 328 |
+
Args:
|
| 329 |
+
model_configs: List of model config dicts
|
| 330 |
+
messages: Messages to send to each model
|
| 331 |
+
timeout: Request timeout
|
| 332 |
+
|
| 333 |
+
Returns:
|
| 334 |
+
Dict mapping model ID to response
|
| 335 |
+
"""
|
| 336 |
+
print(f"🚀 Querying {len(model_configs)} models in parallel...")
|
| 337 |
+
|
| 338 |
+
tasks = [query_model(config, messages, timeout) for config in model_configs]
|
| 339 |
+
responses = await asyncio.gather(*tasks, return_exceptions=True)
|
| 340 |
+
|
| 341 |
+
result = {}
|
| 342 |
+
for config, response in zip(model_configs, responses):
|
| 343 |
+
model_id = config["id"]
|
| 344 |
+
if isinstance(response, Exception):
|
| 345 |
+
print(f"❌ Model {model_id} raised exception: {response}")
|
| 346 |
+
result[model_id] = None
|
| 347 |
+
else:
|
| 348 |
+
result[model_id] = response
|
| 349 |
+
status = "✅" if response else "❌"
|
| 350 |
+
print(f"{status} Model {model_id} completed")
|
| 351 |
+
|
| 352 |
+
successful = sum(1 for r in result.values() if r is not None)
|
| 353 |
+
print(f"📊 {successful}/{len(model_configs)} models responded successfully")
|
| 354 |
+
|
| 355 |
+
return result
|
backend/config_free.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Configuration for LLM Council using FREE HuggingFace models + OpenAI."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
# API Keys
|
| 9 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 10 |
+
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY") # For Inference API
|
| 11 |
+
|
| 12 |
+
# Council members - Mix of FREE HuggingFace models + OpenAI
|
| 13 |
+
# HuggingFace Inference API provides free access to many models
|
| 14 |
+
COUNCIL_MODELS = [
|
| 15 |
+
# OpenAI models (using your key)
|
| 16 |
+
{
|
| 17 |
+
"id": "openai/gpt-4o-mini",
|
| 18 |
+
"provider": "openai",
|
| 19 |
+
"model": "gpt-4o-mini",
|
| 20 |
+
"description": "OpenAI GPT-4o mini - fast and capable"
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"id": "openai/gpt-3.5-turbo",
|
| 24 |
+
"provider": "openai",
|
| 25 |
+
"model": "gpt-3.5-turbo",
|
| 26 |
+
"description": "OpenAI GPT-3.5 Turbo - reliable"
|
| 27 |
+
},
|
| 28 |
+
|
| 29 |
+
# FREE HuggingFace models via Inference API
|
| 30 |
+
{
|
| 31 |
+
"id": "meta-llama/Llama-3.3-70B-Instruct",
|
| 32 |
+
"provider": "huggingface",
|
| 33 |
+
"model": "meta-llama/Llama-3.3-70B-Instruct",
|
| 34 |
+
"description": "Meta Llama 3.3 70B - excellent reasoning"
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"id": "Qwen/Qwen2.5-72B-Instruct",
|
| 38 |
+
"provider": "huggingface",
|
| 39 |
+
"model": "Qwen/Qwen2.5-72B-Instruct",
|
| 40 |
+
"description": "Qwen 2.5 72B - strong performance"
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 44 |
+
"provider": "huggingface",
|
| 45 |
+
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 46 |
+
"description": "Mixtral 8x7B - mixture of experts"
|
| 47 |
+
},
|
| 48 |
+
]
|
| 49 |
+
|
| 50 |
+
# Chairman model - Use OpenAI GPT-4o for best synthesis
|
| 51 |
+
CHAIRMAN_MODEL = {
|
| 52 |
+
"id": "openai/gpt-4o-mini",
|
| 53 |
+
"provider": "openai",
|
| 54 |
+
"model": "gpt-4o-mini",
|
| 55 |
+
"description": "OpenAI GPT-4o mini - excellent synthesis"
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
# Alternative configurations
|
| 59 |
+
#
|
| 60 |
+
# ALL FREE (HuggingFace only):
|
| 61 |
+
# COUNCIL_MODELS = [
|
| 62 |
+
# {"id": "meta-llama/Llama-3.3-70B-Instruct", "provider": "huggingface", ...},
|
| 63 |
+
# {"id": "Qwen/Qwen2.5-72B-Instruct", "provider": "huggingface", ...},
|
| 64 |
+
# {"id": "mistralai/Mixtral-8x7B-Instruct-v0.1", "provider": "huggingface", ...},
|
| 65 |
+
# {"id": "google/gemma-2-27b-it", "provider": "huggingface", ...},
|
| 66 |
+
# {"id": "microsoft/Phi-3.5-mini-instruct", "provider": "huggingface", ...},
|
| 67 |
+
# ]
|
| 68 |
+
#
|
| 69 |
+
# PREMIUM (More OpenAI):
|
| 70 |
+
# COUNCIL_MODELS = [
|
| 71 |
+
# {"id": "openai/gpt-4o", "provider": "openai", "model": "gpt-4o", ...},
|
| 72 |
+
# {"id": "openai/gpt-4o-mini", "provider": "openai", "model": "gpt-4o-mini", ...},
|
| 73 |
+
# {"id": "meta-llama/Llama-3.3-70B-Instruct", "provider": "huggingface", ...},
|
| 74 |
+
# {"id": "Qwen/Qwen2.5-72B-Instruct", "provider": "huggingface", ...},
|
| 75 |
+
# ]
|
| 76 |
+
|
| 77 |
+
# Data directory for conversation storage
|
| 78 |
+
DATA_DIR = "data/conversations"
|
| 79 |
+
|
| 80 |
+
# Timeout settings
|
| 81 |
+
DEFAULT_TIMEOUT = 120.0
|
| 82 |
+
CHAIRMAN_TIMEOUT = 180.0
|
| 83 |
+
|
| 84 |
+
# Retry settings
|
| 85 |
+
MAX_RETRIES = 2
|
| 86 |
+
RETRY_DELAY = 2.0
|
backend/config_improved.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Configuration for the LLM Council - IMPROVED VERSION."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
# OpenRouter API key
|
| 9 |
+
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
|
| 10 |
+
|
| 11 |
+
# Council members - list of OpenRouter model identifiers
|
| 12 |
+
# IMPROVED: Using latest and most capable models as of late 2024/early 2025
|
| 13 |
+
COUNCIL_MODELS = [
|
| 14 |
+
# DeepSeek V3 - excellent reasoning, cost-effective
|
| 15 |
+
"deepseek/deepseek-chat",
|
| 16 |
+
|
| 17 |
+
# Claude 3.7 Sonnet - strong analytical capabilities
|
| 18 |
+
"anthropic/claude-3.7-sonnet",
|
| 19 |
+
|
| 20 |
+
# GPT-4o - OpenAI's latest multimodal model
|
| 21 |
+
"openai/gpt-4o",
|
| 22 |
+
|
| 23 |
+
# Gemini 2.0 Flash Thinking - Google's fast thinking model
|
| 24 |
+
"google/gemini-2.0-flash-thinking-exp:free",
|
| 25 |
+
|
| 26 |
+
# Qwen QwQ - strong reasoning model
|
| 27 |
+
"qwen/qwq-32b-preview",
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
# Alternative council configurations for different use cases:
|
| 31 |
+
#
|
| 32 |
+
# BUDGET_COUNCIL (faster, cheaper):
|
| 33 |
+
# COUNCIL_MODELS = [
|
| 34 |
+
# "deepseek/deepseek-chat",
|
| 35 |
+
# "google/gemini-2.0-flash-exp:free",
|
| 36 |
+
# "qwen/qwen-2.5-72b-instruct",
|
| 37 |
+
# "meta-llama/llama-3.3-70b-instruct",
|
| 38 |
+
# ]
|
| 39 |
+
#
|
| 40 |
+
# PREMIUM_COUNCIL (best quality, higher cost):
|
| 41 |
+
# COUNCIL_MODELS = [
|
| 42 |
+
# "anthropic/claude-3.7-sonnet",
|
| 43 |
+
# "openai/o1",
|
| 44 |
+
# "google/gemini-exp-1206",
|
| 45 |
+
# "anthropic/claude-3-opus",
|
| 46 |
+
# "x-ai/grok-2-1212",
|
| 47 |
+
# ]
|
| 48 |
+
#
|
| 49 |
+
# REASONING_COUNCIL (focused on complex reasoning):
|
| 50 |
+
# COUNCIL_MODELS = [
|
| 51 |
+
# "openai/o1-mini",
|
| 52 |
+
# "deepseek/deepseek-reasoner",
|
| 53 |
+
# "google/gemini-2.0-flash-thinking-exp:free",
|
| 54 |
+
# "qwen/qwq-32b-preview",
|
| 55 |
+
# ]
|
| 56 |
+
|
| 57 |
+
# Chairman model - synthesizes final response
|
| 58 |
+
# IMPROVED: Using DeepSeek R1 for superior reasoning and synthesis
|
| 59 |
+
CHAIRMAN_MODEL = "deepseek/deepseek-reasoner"
|
| 60 |
+
|
| 61 |
+
# Alternative chairman options:
|
| 62 |
+
# CHAIRMAN_MODEL = "anthropic/claude-3.7-sonnet" # Excellent at synthesis
|
| 63 |
+
# CHAIRMAN_MODEL = "openai/o1" # Best reasoning but slower/expensive
|
| 64 |
+
# CHAIRMAN_MODEL = "google/gemini-exp-1206" # Strong context handling
|
| 65 |
+
|
| 66 |
+
# OpenRouter API endpoint
|
| 67 |
+
OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
|
| 68 |
+
|
| 69 |
+
# Data directory for conversation storage
|
| 70 |
+
DATA_DIR = "data/conversations"
|
| 71 |
+
|
| 72 |
+
# Timeout settings
|
| 73 |
+
DEFAULT_TIMEOUT = 120.0 # seconds
|
| 74 |
+
CHAIRMAN_TIMEOUT = 180.0 # Chairman might need more time for synthesis
|
| 75 |
+
|
| 76 |
+
# Retry settings
|
| 77 |
+
MAX_RETRIES = 2
|
| 78 |
+
RETRY_DELAY = 2.0 # seconds
|
backend/council_free.py
ADDED
|
@@ -0,0 +1,386 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""3-stage LLM Council orchestration using FREE models."""
|
| 2 |
+
|
| 3 |
+
from typing import List, Dict, Any, Tuple
|
| 4 |
+
from .api_client import query_models_parallel, query_model, query_model_stream
|
| 5 |
+
from .config_free import COUNCIL_MODELS, CHAIRMAN_MODEL
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
async def stage1_collect_responses(user_query: str) -> List[Dict[str, Any]]:
|
| 9 |
+
"""
|
| 10 |
+
Stage 1: Collect individual responses from all council models.
|
| 11 |
+
|
| 12 |
+
Args:
|
| 13 |
+
user_query: The user's question
|
| 14 |
+
|
| 15 |
+
Returns:
|
| 16 |
+
List of dicts with 'model' and 'response' keys
|
| 17 |
+
"""
|
| 18 |
+
print("STAGE 1: Collecting individual responses from council members...")
|
| 19 |
+
messages = [{"role": "user", "content": user_query}]
|
| 20 |
+
|
| 21 |
+
# Query all models in parallel
|
| 22 |
+
responses = await query_models_parallel(COUNCIL_MODELS, messages)
|
| 23 |
+
|
| 24 |
+
# Format results
|
| 25 |
+
stage1_results = []
|
| 26 |
+
for model_config in COUNCIL_MODELS:
|
| 27 |
+
model_id = model_config["id"]
|
| 28 |
+
response = responses.get(model_id)
|
| 29 |
+
if response is not None:
|
| 30 |
+
stage1_results.append({
|
| 31 |
+
"model": model_id,
|
| 32 |
+
"response": response.get("content", "")
|
| 33 |
+
})
|
| 34 |
+
|
| 35 |
+
print(f"STAGE 1 COMPLETE: Received {len(stage1_results)} responses.")
|
| 36 |
+
return stage1_results
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
async def stage2_collect_rankings(
|
| 40 |
+
user_query: str, stage1_results: List[Dict[str, Any]]
|
| 41 |
+
) -> Tuple[List[Dict[str, Any]], Dict[str, str]]:
|
| 42 |
+
"""
|
| 43 |
+
Stage 2: Each model ranks the anonymized responses.
|
| 44 |
+
|
| 45 |
+
Args:
|
| 46 |
+
user_query: The original user query
|
| 47 |
+
stage1_results: Results from Stage 1
|
| 48 |
+
|
| 49 |
+
Returns:
|
| 50 |
+
Tuple of (rankings list, label_to_model mapping)
|
| 51 |
+
"""
|
| 52 |
+
print("STAGE 2: Council members are ranking each other's responses...")
|
| 53 |
+
|
| 54 |
+
# Create anonymized labels for responses (Response A, Response B, etc.)
|
| 55 |
+
labels = [chr(65 + i) for i in range(len(stage1_results))] # A, B, C, ...
|
| 56 |
+
|
| 57 |
+
# Create mapping from label to model name
|
| 58 |
+
label_to_model = {
|
| 59 |
+
f"Response {label}": result["model"]
|
| 60 |
+
for label, result in zip(labels, stage1_results)
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
# Build the ranking prompt
|
| 64 |
+
responses_text = "\n\n".join([
|
| 65 |
+
f"Response {label}:\n{result['response']}"
|
| 66 |
+
for label, result in zip(labels, stage1_results)
|
| 67 |
+
])
|
| 68 |
+
|
| 69 |
+
ranking_prompt = f"""You are evaluating different responses to the following question:
|
| 70 |
+
|
| 71 |
+
Question: {user_query}
|
| 72 |
+
|
| 73 |
+
Here are the responses from different models (anonymized):
|
| 74 |
+
|
| 75 |
+
{responses_text}
|
| 76 |
+
|
| 77 |
+
Your task:
|
| 78 |
+
1. First, evaluate each response individually. For each response, explain what it does well and what it does poorly.
|
| 79 |
+
2. Then, at the very end of your response, provide a final ranking.
|
| 80 |
+
|
| 81 |
+
IMPORTANT: Your final ranking MUST be formatted EXACTLY as follows:
|
| 82 |
+
- Start with the line "FINAL RANKING:" (all caps, with colon)
|
| 83 |
+
- Then list the responses from best to worst as a numbered list
|
| 84 |
+
- Each line should be: number, period, space, then ONLY the response label (e.g., "1. Response A")
|
| 85 |
+
- Do not add any other text or explanations in the ranking section
|
| 86 |
+
|
| 87 |
+
Example of the correct format for your ENTIRE response:
|
| 88 |
+
|
| 89 |
+
Response A provides good detail on X but misses Y...
|
| 90 |
+
Response B is accurate but lacks depth on Z...
|
| 91 |
+
Response C offers the most comprehensive answer...
|
| 92 |
+
|
| 93 |
+
FINAL RANKING:
|
| 94 |
+
1. Response C
|
| 95 |
+
2. Response A
|
| 96 |
+
3. Response B
|
| 97 |
+
|
| 98 |
+
Now provide your evaluation and ranking:"""
|
| 99 |
+
|
| 100 |
+
messages = [{"role": "user", "content": ranking_prompt}]
|
| 101 |
+
|
| 102 |
+
# Get rankings from all council models in parallel
|
| 103 |
+
responses = await query_models_parallel(COUNCIL_MODELS, messages)
|
| 104 |
+
|
| 105 |
+
# Format results
|
| 106 |
+
stage2_results = []
|
| 107 |
+
for model_config in COUNCIL_MODELS:
|
| 108 |
+
model_id = model_config["id"]
|
| 109 |
+
response = responses.get(model_id)
|
| 110 |
+
if response is not None:
|
| 111 |
+
full_text = response.get("content", "")
|
| 112 |
+
parsed = parse_ranking_from_text(full_text)
|
| 113 |
+
stage2_results.append({
|
| 114 |
+
"model": model_id,
|
| 115 |
+
"ranking": full_text,
|
| 116 |
+
"parsed_ranking": parsed
|
| 117 |
+
})
|
| 118 |
+
|
| 119 |
+
print("STAGE 2 COMPLETE: Rankings collected.")
|
| 120 |
+
return stage2_results, label_to_model
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
async def stage3_synthesize_final(
|
| 124 |
+
user_query: str,
|
| 125 |
+
stage1_results: List[Dict[str, Any]],
|
| 126 |
+
stage2_results: List[Dict[str, Any]]
|
| 127 |
+
) -> Dict[str, Any]:
|
| 128 |
+
"""
|
| 129 |
+
Stage 3: Chairman synthesizes final response.
|
| 130 |
+
|
| 131 |
+
Args:
|
| 132 |
+
user_query: The original user query
|
| 133 |
+
stage1_results: Individual model responses from Stage 1
|
| 134 |
+
stage2_results: Rankings from Stage 2
|
| 135 |
+
|
| 136 |
+
Returns:
|
| 137 |
+
Dict with 'model' and 'response' keys
|
| 138 |
+
"""
|
| 139 |
+
print("STAGE 3: Chairman is synthesizing the final answer...")
|
| 140 |
+
|
| 141 |
+
# Build comprehensive context for chairman
|
| 142 |
+
stage1_text = "\n\n".join([
|
| 143 |
+
f"Model: {result['model']}\nResponse: {result['response']}"
|
| 144 |
+
for result in stage1_results
|
| 145 |
+
])
|
| 146 |
+
|
| 147 |
+
stage2_text = "\n\n".join([
|
| 148 |
+
f"Model: {result['model']}\nRanking: {result['ranking']}"
|
| 149 |
+
for result in stage2_results
|
| 150 |
+
])
|
| 151 |
+
|
| 152 |
+
chairman_prompt = f"""You are the Chairman of an LLM Council. Multiple AI models have provided responses to a user's question, and then ranked each other's responses.
|
| 153 |
+
|
| 154 |
+
Original Question: {user_query}
|
| 155 |
+
|
| 156 |
+
STAGE 1 - Individual Responses:
|
| 157 |
+
{stage1_text}
|
| 158 |
+
|
| 159 |
+
STAGE 2 - Peer Rankings:
|
| 160 |
+
{stage2_text}
|
| 161 |
+
|
| 162 |
+
Your task as Chairman is to synthesize all of this information into a single, comprehensive, accurate answer to the user's original question. Consider:
|
| 163 |
+
- The individual responses and their insights
|
| 164 |
+
- The peer rankings and what they reveal about response quality
|
| 165 |
+
- Any patterns of agreement or disagreement
|
| 166 |
+
|
| 167 |
+
Provide a clear, well-reasoned final answer that represents the council's collective wisdom:"""
|
| 168 |
+
|
| 169 |
+
messages = [{"role": "user", "content": chairman_prompt}]
|
| 170 |
+
|
| 171 |
+
# Query the chairman model
|
| 172 |
+
response = await query_model(CHAIRMAN_MODEL, messages)
|
| 173 |
+
|
| 174 |
+
if response is None:
|
| 175 |
+
print("STAGE 3 ERROR: Unable to generate final synthesis.")
|
| 176 |
+
return {
|
| 177 |
+
"model": CHAIRMAN_MODEL["id"],
|
| 178 |
+
"response": "Error: Unable to generate final synthesis."
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
print("STAGE 3 COMPLETE: Final answer synthesized.")
|
| 182 |
+
return {
|
| 183 |
+
"model": CHAIRMAN_MODEL["id"],
|
| 184 |
+
"response": response.get("content", "")
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
async def stage3_synthesize_final_stream(
|
| 189 |
+
user_query: str,
|
| 190 |
+
stage1_results: List[Dict[str, Any]],
|
| 191 |
+
stage2_results: List[Dict[str, Any]]
|
| 192 |
+
):
|
| 193 |
+
"""
|
| 194 |
+
Stage 3: Chairman synthesizes final response (Streaming).
|
| 195 |
+
Yields chunks of text.
|
| 196 |
+
"""
|
| 197 |
+
print("STAGE 3: Chairman is synthesizing the final answer (Streaming)...")
|
| 198 |
+
|
| 199 |
+
# Build comprehensive context for chairman
|
| 200 |
+
stage1_text = "\n\n".join([
|
| 201 |
+
f"Model: {result['model']}\nResponse: {result['response']}"
|
| 202 |
+
for result in stage1_results
|
| 203 |
+
])
|
| 204 |
+
|
| 205 |
+
stage2_text = "\n\n".join([
|
| 206 |
+
f"Model: {result['model']}\nRanking: {result['ranking']}"
|
| 207 |
+
for result in stage2_results
|
| 208 |
+
])
|
| 209 |
+
|
| 210 |
+
chairman_prompt = f"""You are the Chairman of an LLM Council. Multiple AI models have provided responses to a user's question, and then ranked each other's responses.
|
| 211 |
+
|
| 212 |
+
Original Question: {user_query}
|
| 213 |
+
|
| 214 |
+
STAGE 1 - Individual Responses:
|
| 215 |
+
{stage1_text}
|
| 216 |
+
|
| 217 |
+
STAGE 2 - Peer Rankings:
|
| 218 |
+
{stage2_text}
|
| 219 |
+
|
| 220 |
+
Your task as Chairman is to synthesize all of this information into a single, comprehensive, accurate answer to the user's original question. Consider:
|
| 221 |
+
- The individual responses and their insights
|
| 222 |
+
- The peer rankings and what they reveal about response quality
|
| 223 |
+
- Any patterns of agreement or disagreement
|
| 224 |
+
|
| 225 |
+
Provide a clear, well-reasoned final answer that represents the council's collective wisdom:"""
|
| 226 |
+
|
| 227 |
+
messages = [{"role": "user", "content": chairman_prompt}]
|
| 228 |
+
|
| 229 |
+
# Stream the chairman model
|
| 230 |
+
async for chunk in query_model_stream(CHAIRMAN_MODEL, messages):
|
| 231 |
+
yield chunk
|
| 232 |
+
|
| 233 |
+
print("STAGE 3 COMPLETE: Final answer stream finished.")
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
def parse_ranking_from_text(ranking_text: str) -> List[str]:
|
| 237 |
+
"""
|
| 238 |
+
Parse the FINAL RANKING section from the model's response.
|
| 239 |
+
|
| 240 |
+
Args:
|
| 241 |
+
ranking_text: The full text response from the model
|
| 242 |
+
|
| 243 |
+
Returns:
|
| 244 |
+
List of response labels in ranked order
|
| 245 |
+
"""
|
| 246 |
+
import re
|
| 247 |
+
|
| 248 |
+
# Look for "FINAL RANKING:" section
|
| 249 |
+
if "FINAL RANKING:" in ranking_text:
|
| 250 |
+
parts = ranking_text.split("FINAL RANKING:")
|
| 251 |
+
if len(parts) >= 2:
|
| 252 |
+
ranking_section = parts[1]
|
| 253 |
+
# Extract numbered list format
|
| 254 |
+
numbered_matches = re.findall(r"\d+\.\s*Response [A-Z]", ranking_section)
|
| 255 |
+
if numbered_matches:
|
| 256 |
+
return [re.search(r"Response [A-Z]", m).group() for m in numbered_matches]
|
| 257 |
+
|
| 258 |
+
# Fallback: Extract all "Response X" patterns in order
|
| 259 |
+
matches = re.findall(r"Response [A-Z]", ranking_section)
|
| 260 |
+
return matches
|
| 261 |
+
|
| 262 |
+
# Fallback: try to find any "Response X" patterns in order
|
| 263 |
+
matches = re.findall(r"Response [A-Z]", ranking_text)
|
| 264 |
+
return matches
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
def calculate_aggregate_rankings(
|
| 268 |
+
stage2_results: List[Dict[str, Any]],
|
| 269 |
+
label_to_model: Dict[str, str]
|
| 270 |
+
) -> List[Dict[str, Any]]:
|
| 271 |
+
"""
|
| 272 |
+
Calculate aggregate rankings across all models.
|
| 273 |
+
|
| 274 |
+
Args:
|
| 275 |
+
stage2_results: Rankings from each model
|
| 276 |
+
label_to_model: Mapping from anonymous labels to model names
|
| 277 |
+
|
| 278 |
+
Returns:
|
| 279 |
+
List of dicts with model name and average rank, sorted best to worst
|
| 280 |
+
"""
|
| 281 |
+
from collections import defaultdict
|
| 282 |
+
|
| 283 |
+
# Track positions for each model
|
| 284 |
+
model_positions = defaultdict(list)
|
| 285 |
+
|
| 286 |
+
for ranking in stage2_results:
|
| 287 |
+
ranking_text = ranking["ranking"]
|
| 288 |
+
parsed_ranking = parse_ranking_from_text(ranking_text)
|
| 289 |
+
|
| 290 |
+
for position, label in enumerate(parsed_ranking, start=1):
|
| 291 |
+
if label in label_to_model:
|
| 292 |
+
model_name = label_to_model[label]
|
| 293 |
+
model_positions[model_name].append(position)
|
| 294 |
+
|
| 295 |
+
# Calculate average position for each model
|
| 296 |
+
aggregate = []
|
| 297 |
+
for model, positions in model_positions.items():
|
| 298 |
+
if positions:
|
| 299 |
+
avg_rank = sum(positions) / len(positions)
|
| 300 |
+
aggregate.append({
|
| 301 |
+
"model": model,
|
| 302 |
+
"average_rank": round(avg_rank, 2),
|
| 303 |
+
"rankings_count": len(positions)
|
| 304 |
+
})
|
| 305 |
+
|
| 306 |
+
# Sort by average rank (lower is better)
|
| 307 |
+
aggregate.sort(key=lambda x: x["average_rank"])
|
| 308 |
+
|
| 309 |
+
return aggregate
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
async def generate_conversation_title(user_query: str) -> str:
|
| 313 |
+
"""
|
| 314 |
+
Generate a short title for a conversation based on the first user message.
|
| 315 |
+
|
| 316 |
+
Args:
|
| 317 |
+
user_query: The first user message
|
| 318 |
+
|
| 319 |
+
Returns:
|
| 320 |
+
A short title (3-5 words)
|
| 321 |
+
"""
|
| 322 |
+
title_prompt = f"""Generate a very short title (3-5 words maximum) that summarizes the following question.
|
| 323 |
+
The title should be concise and descriptive. Do not use quotes or punctuation in the title.
|
| 324 |
+
|
| 325 |
+
Question: {user_query}
|
| 326 |
+
|
| 327 |
+
Title:"""
|
| 328 |
+
|
| 329 |
+
messages = [{"role": "user", "content": title_prompt}]
|
| 330 |
+
|
| 331 |
+
# Use GPT-4o-mini for fast title generation
|
| 332 |
+
response = await query_model(CHAIRMAN_MODEL, messages, timeout=30.0)
|
| 333 |
+
|
| 334 |
+
if response is None:
|
| 335 |
+
return "New Conversation"
|
| 336 |
+
|
| 337 |
+
title = response.get("content", "New Conversation").strip()
|
| 338 |
+
title = title.strip("\"'")
|
| 339 |
+
|
| 340 |
+
# Truncate if too long
|
| 341 |
+
if len(title) > 50:
|
| 342 |
+
title = title[:47] + "..."
|
| 343 |
+
|
| 344 |
+
return title
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
async def run_full_council(user_query: str) -> Tuple[List, List, Dict, Dict]:
|
| 348 |
+
"""
|
| 349 |
+
Run the complete 3-stage council process.
|
| 350 |
+
|
| 351 |
+
Args:
|
| 352 |
+
user_query: The user's question
|
| 353 |
+
|
| 354 |
+
Returns:
|
| 355 |
+
Tuple of (stage1_results, stage2_results, stage3_result, metadata)
|
| 356 |
+
"""
|
| 357 |
+
# Stage 1: Collect individual responses
|
| 358 |
+
stage1_results = await stage1_collect_responses(user_query)
|
| 359 |
+
|
| 360 |
+
# If no models responded successfully, return error
|
| 361 |
+
if not stage1_results:
|
| 362 |
+
return [], [], {
|
| 363 |
+
"model": "error",
|
| 364 |
+
"response": "All models failed to respond. Please try again."
|
| 365 |
+
}, {}
|
| 366 |
+
|
| 367 |
+
# Stage 2: Collect rankings
|
| 368 |
+
stage2_results, label_to_model = await stage2_collect_rankings(
|
| 369 |
+
user_query, stage1_results
|
| 370 |
+
)
|
| 371 |
+
|
| 372 |
+
# Calculate aggregate rankings
|
| 373 |
+
aggregate_rankings = calculate_aggregate_rankings(stage2_results, label_to_model)
|
| 374 |
+
|
| 375 |
+
# Stage 3: Synthesize final answer
|
| 376 |
+
stage3_result = await stage3_synthesize_final(
|
| 377 |
+
user_query, stage1_results, stage2_results
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
# Prepare metadata
|
| 381 |
+
metadata = {
|
| 382 |
+
"label_to_model": label_to_model,
|
| 383 |
+
"aggregate_rankings": aggregate_rankings
|
| 384 |
+
}
|
| 385 |
+
|
| 386 |
+
return stage1_results, stage2_results, stage3_result, metadata
|
backend/openrouter_improved.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""OpenRouter API client with improved error handling and retry logic."""
|
| 2 |
+
|
| 3 |
+
import httpx
|
| 4 |
+
import asyncio
|
| 5 |
+
from typing import List, Dict, Any, Optional
|
| 6 |
+
from .config_improved import (
|
| 7 |
+
OPENROUTER_API_KEY,
|
| 8 |
+
OPENROUTER_API_URL,
|
| 9 |
+
DEFAULT_TIMEOUT,
|
| 10 |
+
MAX_RETRIES,
|
| 11 |
+
RETRY_DELAY
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
async def query_model(
|
| 16 |
+
model: str,
|
| 17 |
+
messages: List[Dict[str, str]],
|
| 18 |
+
timeout: float = DEFAULT_TIMEOUT,
|
| 19 |
+
max_retries: int = MAX_RETRIES
|
| 20 |
+
) -> Optional[Dict[str, Any]]:
|
| 21 |
+
"""
|
| 22 |
+
Query a single model via OpenRouter API with retry logic.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
model: OpenRouter model identifier (e.g., "openai/gpt-4o")
|
| 26 |
+
messages: List of message dicts with 'role' and 'content'
|
| 27 |
+
timeout: Request timeout in seconds
|
| 28 |
+
max_retries: Maximum number of retry attempts
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
Response dict with 'content' and optional 'reasoning_details', or None if failed
|
| 32 |
+
"""
|
| 33 |
+
headers = {
|
| 34 |
+
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
| 35 |
+
"Content-Type": "application/json",
|
| 36 |
+
"HTTP-Referer": "https://huggingface.co/spaces/burtenshaw/karpathy-llm-council",
|
| 37 |
+
"X-Title": "LLM Council",
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
payload = {
|
| 41 |
+
"model": model,
|
| 42 |
+
"messages": messages,
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
for attempt in range(max_retries + 1):
|
| 46 |
+
try:
|
| 47 |
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
| 48 |
+
response = await client.post(OPENROUTER_API_URL, headers=headers, json=payload)
|
| 49 |
+
response.raise_for_status()
|
| 50 |
+
|
| 51 |
+
data = response.json()
|
| 52 |
+
message = data["choices"][0]["message"]
|
| 53 |
+
|
| 54 |
+
return {
|
| 55 |
+
"content": message.get("content"),
|
| 56 |
+
"reasoning_details": message.get("reasoning_details")
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
except httpx.TimeoutException as e:
|
| 60 |
+
print(f"⏱️ Timeout querying model {model} (attempt {attempt + 1}/{max_retries + 1}): {e}")
|
| 61 |
+
if attempt < max_retries:
|
| 62 |
+
await asyncio.sleep(RETRY_DELAY * (attempt + 1)) # Exponential backoff
|
| 63 |
+
continue
|
| 64 |
+
return None
|
| 65 |
+
|
| 66 |
+
except httpx.HTTPStatusError as e:
|
| 67 |
+
print(f"🚫 HTTP error querying model {model}: {e.response.status_code} - {e.response.text}")
|
| 68 |
+
# Don't retry on 4xx errors (client errors)
|
| 69 |
+
if 400 <= e.response.status_code < 500:
|
| 70 |
+
return None
|
| 71 |
+
# Retry on 5xx errors (server errors)
|
| 72 |
+
if attempt < max_retries:
|
| 73 |
+
await asyncio.sleep(RETRY_DELAY * (attempt + 1))
|
| 74 |
+
continue
|
| 75 |
+
return None
|
| 76 |
+
|
| 77 |
+
except Exception as e:
|
| 78 |
+
print(f"❌ Error querying model {model} (attempt {attempt + 1}/{max_retries + 1}): {e}")
|
| 79 |
+
if attempt < max_retries:
|
| 80 |
+
await asyncio.sleep(RETRY_DELAY)
|
| 81 |
+
continue
|
| 82 |
+
return None
|
| 83 |
+
|
| 84 |
+
return None
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
async def query_model_stream(
|
| 88 |
+
model: str,
|
| 89 |
+
messages: List[Dict[str, str]],
|
| 90 |
+
timeout: float = DEFAULT_TIMEOUT
|
| 91 |
+
):
|
| 92 |
+
"""
|
| 93 |
+
Query a model via OpenRouter API and stream the response.
|
| 94 |
+
Yields content chunks as they arrive.
|
| 95 |
+
|
| 96 |
+
Args:
|
| 97 |
+
model: OpenRouter model identifier
|
| 98 |
+
messages: List of message dicts with 'role' and 'content'
|
| 99 |
+
timeout: Request timeout in seconds
|
| 100 |
+
|
| 101 |
+
Yields:
|
| 102 |
+
Content chunks as strings
|
| 103 |
+
"""
|
| 104 |
+
headers = {
|
| 105 |
+
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
| 106 |
+
"Content-Type": "application/json",
|
| 107 |
+
"HTTP-Referer": "https://huggingface.co/spaces/burtenshaw/karpathy-llm-council",
|
| 108 |
+
"X-Title": "LLM Council",
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
payload = {
|
| 112 |
+
"model": model,
|
| 113 |
+
"messages": messages,
|
| 114 |
+
"stream": True
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
import json
|
| 118 |
+
|
| 119 |
+
try:
|
| 120 |
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
| 121 |
+
async with client.stream("POST", OPENROUTER_API_URL, headers=headers, json=payload) as response:
|
| 122 |
+
response.raise_for_status()
|
| 123 |
+
async for line in response.aiter_lines():
|
| 124 |
+
if line.startswith("data: "):
|
| 125 |
+
data_str = line[6:]
|
| 126 |
+
if data_str.strip() == "[DONE]":
|
| 127 |
+
break
|
| 128 |
+
try:
|
| 129 |
+
data = json.loads(data_str)
|
| 130 |
+
delta = data["choices"][0]["delta"]
|
| 131 |
+
content = delta.get("content")
|
| 132 |
+
if content:
|
| 133 |
+
yield content
|
| 134 |
+
except json.JSONDecodeError:
|
| 135 |
+
pass
|
| 136 |
+
except KeyError:
|
| 137 |
+
pass
|
| 138 |
+
|
| 139 |
+
except httpx.TimeoutException as e:
|
| 140 |
+
print(f"⏱️ Timeout streaming model {model}: {e}")
|
| 141 |
+
yield f"\n\n[Error: Request timed out after {timeout}s]"
|
| 142 |
+
|
| 143 |
+
except httpx.HTTPStatusError as e:
|
| 144 |
+
print(f"🚫 HTTP error streaming model {model}: {e.response.status_code}")
|
| 145 |
+
yield f"\n\n[Error: HTTP {e.response.status_code}]"
|
| 146 |
+
|
| 147 |
+
except Exception as e:
|
| 148 |
+
print(f"❌ Error streaming model {model}: {e}")
|
| 149 |
+
yield f"\n\n[Error: {str(e)}]"
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
async def query_models_parallel(
|
| 153 |
+
models: List[str],
|
| 154 |
+
messages: List[Dict[str, str]],
|
| 155 |
+
timeout: float = DEFAULT_TIMEOUT
|
| 156 |
+
) -> Dict[str, Optional[Dict[str, Any]]]:
|
| 157 |
+
"""
|
| 158 |
+
Query multiple models in parallel with individual error handling.
|
| 159 |
+
|
| 160 |
+
Args:
|
| 161 |
+
models: List of OpenRouter model identifiers
|
| 162 |
+
messages: List of message dicts to send to each model
|
| 163 |
+
timeout: Request timeout in seconds
|
| 164 |
+
|
| 165 |
+
Returns:
|
| 166 |
+
Dict mapping model identifier to response dict (or None if failed)
|
| 167 |
+
"""
|
| 168 |
+
import asyncio
|
| 169 |
+
|
| 170 |
+
print(f"🚀 Querying {len(models)} models in parallel...")
|
| 171 |
+
|
| 172 |
+
# Create tasks for all models
|
| 173 |
+
tasks = [query_model(model, messages, timeout=timeout) for model in models]
|
| 174 |
+
|
| 175 |
+
# Wait for all to complete
|
| 176 |
+
responses = await asyncio.gather(*tasks, return_exceptions=True)
|
| 177 |
+
|
| 178 |
+
# Map models to their responses, handling exceptions
|
| 179 |
+
result = {}
|
| 180 |
+
for model, response in zip(models, responses):
|
| 181 |
+
if isinstance(response, Exception):
|
| 182 |
+
print(f"❌ Model {model} raised exception: {response}")
|
| 183 |
+
result[model] = None
|
| 184 |
+
else:
|
| 185 |
+
result[model] = response
|
| 186 |
+
status = "✅" if response else "❌"
|
| 187 |
+
print(f"{status} Model {model} completed")
|
| 188 |
+
|
| 189 |
+
successful = sum(1 for r in result.values() if r is not None)
|
| 190 |
+
print(f"📊 {successful}/{len(models)} models responded successfully")
|
| 191 |
+
|
| 192 |
+
return result
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core dependencies
|
| 2 |
+
gradio>=6.0.0
|
| 3 |
+
httpx>=0.27.0
|
| 4 |
+
python-dotenv>=1.0.0
|
| 5 |
+
openai>=1.0.0
|
| 6 |
+
|
| 7 |
+
# FastAPI backend (optional - for REST API)
|
| 8 |
+
fastapi>=0.115.0
|
| 9 |
+
uvicorn>=0.30.0
|
| 10 |
+
pydantic>=2.0.0
|
| 11 |
+
|