File size: 4,981 Bytes
bf47268
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import os
from typing import Optional
import asyncio


_llm_config = {
    'provider': None,
    'model': None
}


def setup_llm_fallback():
    """Setup LLM provider fallback chain"""
    # Try OpenAI first
    if os.getenv('OPENAI_API_KEY'):
        _llm_config['provider'] = 'openai'
        _llm_config['model'] = 'gpt-4o-mini'
        return
    
    # Fallback to Groq
    if os.getenv('GROQ_API_KEY'):
        _llm_config['provider'] = 'groq'
        _llm_config['model'] = 'llama-3.3-70b-versatile'
        return
    
    # Fallback to Hyperbolic
    if os.getenv('HYPERBOLIC_API_KEY'):
        _llm_config['provider'] = 'hyperbolic'
        _llm_config['model'] = 'meta-llama/Llama-3.3-70B-Instruct'
        return
    
    # Last resort: Hugging Face Inference API
    if os.getenv('HF_TOKEN'):
        _llm_config['provider'] = 'huggingface'
        _llm_config['model'] = 'mistralai/Mixtral-8x7B-Instruct-v0.1'
        return
    
    raise ValueError("No LLM API keys configured. Please set at least one of: OPENAI_API_KEY, GROQ_API_KEY, HYPERBOLIC_API_KEY, HF_TOKEN")


async def get_llm_response(
    prompt: str,
    temperature: float = 0.7,
    max_tokens: int = 2000
) -> str:
    """
    Get LLM response using fallback chain
    
    Args:
        prompt: Input prompt
        temperature: Sampling temperature
        max_tokens: Maximum tokens to generate
        
    Returns:
        LLM response text
    """
    provider = _llm_config.get('provider')
    model = _llm_config.get('model')
    
    if not provider:
        setup_llm_fallback()
        provider = _llm_config.get('provider')
        model = _llm_config.get('model')
    
    try:
        if provider == 'openai':
            return await _call_openai(prompt, model, temperature, max_tokens)
        elif provider == 'groq':
            return await _call_groq(prompt, model, temperature, max_tokens)
        elif provider == 'hyperbolic':
            return await _call_hyperbolic(prompt, model, temperature, max_tokens)
        elif provider == 'huggingface':
            return await _call_huggingface(prompt, model, temperature, max_tokens)
    except Exception as e:
        print(f"Error with {provider}: {e}")
        # Try next provider in chain
        if provider == 'openai' and os.getenv('GROQ_API_KEY'):
            _llm_config['provider'] = 'groq'
            return await get_llm_response(prompt, temperature, max_tokens)
        raise


async def _call_openai(prompt: str, model: str, temperature: float, max_tokens: int) -> str:
    """Call OpenAI API"""
    from openai import AsyncOpenAI
    
    client = AsyncOpenAI(api_key=os.getenv('OPENAI_API_KEY'))
    
    response = await client.chat.completions.create(
        model=model,
        messages=[{'role': 'user', 'content': prompt}],
        temperature=temperature,
        max_tokens=max_tokens
    )
    
    return response.choices[0].message.content


async def _call_groq(prompt: str, model: str, temperature: float, max_tokens: int) -> str:
    """Call Groq API"""
    from groq import AsyncGroq
    
    client = AsyncGroq(api_key=os.getenv('GROQ_API_KEY'))
    
    response = await client.chat.completions.create(
        model=model,
        messages=[{'role': 'user', 'content': prompt}],
        temperature=temperature,
        max_tokens=max_tokens
    )
    
    return response.choices[0].message.content


async def _call_hyperbolic(prompt: str, model: str, temperature: float, max_tokens: int) -> str:
    """Call Hyperbolic API"""
    import aiohttp
    
    url = "https://api.hyperbolic.xyz/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {os.getenv('HYPERBOLIC_API_KEY')}"
    }
    
    data = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": temperature,
        "max_tokens": max_tokens
    }
    
    async with aiohttp.ClientSession() as session:
        async with session.post(url, headers=headers, json=data) as response:
            result = await response.json()
            return result['choices'][0]['message']['content']


async def _call_huggingface(prompt: str, model: str, temperature: float, max_tokens: int) -> str:
    """Call Hugging Face Inference API"""
    import aiohttp
    
    url = f"https://api-inference.huggingface.co/models/{model}"
    headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}
    
    data = {
        "inputs": prompt,
        "parameters": {
            "temperature": temperature,
            "max_new_tokens": max_tokens,
            "return_full_text": False
        }
    }
    
    async with aiohttp.ClientSession() as session:
        async with session.post(url, headers=headers, json=data) as response:
            result = await response.json()
            if isinstance(result, list) and len(result) > 0:
                return result[0].get('generated_text', '')
            return str(result)