Spaces:

zade-frontier
/

andrej-karpathy-llm-council

Running

andrej-karpathy-llm-council / backend /config_improved.py

Krishna Chaitanya Cheedella

Refactor to use FREE HuggingFace models + OpenAI instead of OpenRouter

aa61236 13 days ago

2.29 kB

	"""Configuration for the LLM Council - IMPROVED VERSION."""

	import os
	from dotenv import load_dotenv

	load_dotenv()

	# OpenRouter API key
	OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")

	# Council members - list of OpenRouter model identifiers
	# IMPROVED: Using latest and most capable models as of late 2024/early 2025
	COUNCIL_MODELS = [
	# DeepSeek V3 - excellent reasoning, cost-effective
	"deepseek/deepseek-chat",

	# Claude 3.7 Sonnet - strong analytical capabilities
	"anthropic/claude-3.7-sonnet",

	# GPT-4o - OpenAI's latest multimodal model
	"openai/gpt-4o",

	# Gemini 2.0 Flash Thinking - Google's fast thinking model
	"google/gemini-2.0-flash-thinking-exp:free",

	# Qwen QwQ - strong reasoning model
	"qwen/qwq-32b-preview",
	]

	# Alternative council configurations for different use cases:
	#
	# BUDGET_COUNCIL (faster, cheaper):
	# COUNCIL_MODELS = [
	# "deepseek/deepseek-chat",
	# "google/gemini-2.0-flash-exp:free",
	# "qwen/qwen-2.5-72b-instruct",
	# "meta-llama/llama-3.3-70b-instruct",
	# ]
	#
	# PREMIUM_COUNCIL (best quality, higher cost):
	# COUNCIL_MODELS = [
	# "anthropic/claude-3.7-sonnet",
	# "openai/o1",
	# "google/gemini-exp-1206",
	# "anthropic/claude-3-opus",
	# "x-ai/grok-2-1212",
	# ]
	#
	# REASONING_COUNCIL (focused on complex reasoning):
	# COUNCIL_MODELS = [
	# "openai/o1-mini",
	# "deepseek/deepseek-reasoner",
	# "google/gemini-2.0-flash-thinking-exp:free",
	# "qwen/qwq-32b-preview",
	# ]

	# Chairman model - synthesizes final response
	# IMPROVED: Using DeepSeek R1 for superior reasoning and synthesis
	CHAIRMAN_MODEL = "deepseek/deepseek-reasoner"

	# Alternative chairman options:
	# CHAIRMAN_MODEL = "anthropic/claude-3.7-sonnet" # Excellent at synthesis
	# CHAIRMAN_MODEL = "openai/o1" # Best reasoning but slower/expensive
	# CHAIRMAN_MODEL = "google/gemini-exp-1206" # Strong context handling

	# OpenRouter API endpoint
	OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"

	# Data directory for conversation storage
	DATA_DIR = "data/conversations"

	# Timeout settings
	DEFAULT_TIMEOUT = 120.0 # seconds
	CHAIRMAN_TIMEOUT = 180.0 # Chairman might need more time for synthesis

	# Retry settings
	MAX_RETRIES = 2
	RETRY_DELAY = 2.0 # seconds