Spaces:
Running
Running
| import os | |
| from typing import Optional | |
| import asyncio | |
| _llm_config = { | |
| 'provider': None, | |
| 'model': None | |
| } | |
| def setup_llm_fallback(): | |
| """Setup LLM provider fallback chain""" | |
| # Try OpenAI first | |
| if os.getenv('OPENAI_API_KEY'): | |
| _llm_config['provider'] = 'openai' | |
| _llm_config['model'] = 'gpt-4o-mini' | |
| return | |
| # Fallback to Groq | |
| if os.getenv('GROQ_API_KEY'): | |
| _llm_config['provider'] = 'groq' | |
| _llm_config['model'] = 'llama-3.3-70b-versatile' | |
| return | |
| # Fallback to Hyperbolic | |
| if os.getenv('HYPERBOLIC_API_KEY'): | |
| _llm_config['provider'] = 'hyperbolic' | |
| _llm_config['model'] = 'meta-llama/Llama-3.3-70B-Instruct' | |
| return | |
| # Last resort: Hugging Face Inference API | |
| if os.getenv('HF_TOKEN'): | |
| _llm_config['provider'] = 'huggingface' | |
| _llm_config['model'] = 'mistralai/Mixtral-8x7B-Instruct-v0.1' | |
| return | |
| raise ValueError("No LLM API keys configured. Please set at least one of: OPENAI_API_KEY, GROQ_API_KEY, HYPERBOLIC_API_KEY, HF_TOKEN") | |
| async def get_llm_response( | |
| prompt: str, | |
| temperature: float = 0.7, | |
| max_tokens: int = 2000 | |
| ) -> str: | |
| """ | |
| Get LLM response using fallback chain | |
| Args: | |
| prompt: Input prompt | |
| temperature: Sampling temperature | |
| max_tokens: Maximum tokens to generate | |
| Returns: | |
| LLM response text | |
| """ | |
| provider = _llm_config.get('provider') | |
| model = _llm_config.get('model') | |
| if not provider: | |
| setup_llm_fallback() | |
| provider = _llm_config.get('provider') | |
| model = _llm_config.get('model') | |
| try: | |
| if provider == 'openai': | |
| return await _call_openai(prompt, model, temperature, max_tokens) | |
| elif provider == 'groq': | |
| return await _call_groq(prompt, model, temperature, max_tokens) | |
| elif provider == 'hyperbolic': | |
| return await _call_hyperbolic(prompt, model, temperature, max_tokens) | |
| elif provider == 'huggingface': | |
| return await _call_huggingface(prompt, model, temperature, max_tokens) | |
| except Exception as e: | |
| print(f"Error with {provider}: {e}") | |
| # Try next provider in chain | |
| if provider == 'openai' and os.getenv('GROQ_API_KEY'): | |
| _llm_config['provider'] = 'groq' | |
| return await get_llm_response(prompt, temperature, max_tokens) | |
| raise | |
| async def _call_openai(prompt: str, model: str, temperature: float, max_tokens: int) -> str: | |
| """Call OpenAI API""" | |
| from openai import AsyncOpenAI | |
| client = AsyncOpenAI(api_key=os.getenv('OPENAI_API_KEY')) | |
| response = await client.chat.completions.create( | |
| model=model, | |
| messages=[{'role': 'user', 'content': prompt}], | |
| temperature=temperature, | |
| max_tokens=max_tokens | |
| ) | |
| return response.choices[0].message.content | |
| async def _call_groq(prompt: str, model: str, temperature: float, max_tokens: int) -> str: | |
| """Call Groq API""" | |
| from groq import AsyncGroq | |
| client = AsyncGroq(api_key=os.getenv('GROQ_API_KEY')) | |
| response = await client.chat.completions.create( | |
| model=model, | |
| messages=[{'role': 'user', 'content': prompt}], | |
| temperature=temperature, | |
| max_tokens=max_tokens | |
| ) | |
| return response.choices[0].message.content | |
| async def _call_hyperbolic(prompt: str, model: str, temperature: float, max_tokens: int) -> str: | |
| """Call Hyperbolic API""" | |
| import aiohttp | |
| url = "https://api.hyperbolic.xyz/v1/chat/completions" | |
| headers = { | |
| "Content-Type": "application/json", | |
| "Authorization": f"Bearer {os.getenv('HYPERBOLIC_API_KEY')}" | |
| } | |
| data = { | |
| "model": model, | |
| "messages": [{"role": "user", "content": prompt}], | |
| "temperature": temperature, | |
| "max_tokens": max_tokens | |
| } | |
| async with aiohttp.ClientSession() as session: | |
| async with session.post(url, headers=headers, json=data) as response: | |
| result = await response.json() | |
| return result['choices'][0]['message']['content'] | |
| async def _call_huggingface(prompt: str, model: str, temperature: float, max_tokens: int) -> str: | |
| """Call Hugging Face Inference API""" | |
| import aiohttp | |
| url = f"https://api-inference.huggingface.co/models/{model}" | |
| headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"} | |
| data = { | |
| "inputs": prompt, | |
| "parameters": { | |
| "temperature": temperature, | |
| "max_new_tokens": max_tokens, | |
| "return_full_text": False | |
| } | |
| } | |
| async with aiohttp.ClientSession() as session: | |
| async with session.post(url, headers=headers, json=data) as response: | |
| result = await response.json() | |
| if isinstance(result, list) and len(result) > 0: | |
| return result[0].get('generated_text', '') | |
| return str(result) | |