Spaces:
Sleeping
Sleeping
File size: 7,849 Bytes
9679fcd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 |
"""
Groq API Integration for Ultra-Fast LLM Inference
Supports Llama and Mixtral models with streaming
"""
import os
from typing import List, Dict, Optional, Generator
from groq import Groq
import json
class GroqLLM:
"""Groq API client for fast LLM inference"""
def __init__(
self,
api_key: Optional[str] = None,
model: str = "llama-3.3-70b-versatile", # or "mixtral-8x7b-32768"
temperature: float = 0.1,
max_tokens: int = 1024
):
"""
Initialize Groq LLM client
Available models:
- llama-3.3-70b-versatile (best accuracy, 8k context)
- llama-3.1-70b-versatile (good accuracy, 128k context)
- mixtral-8x7b-32768 (fast, good reasoning, 32k context)
- llama-3.1-8b-instant (fastest, 128k context)
"""
self.api_key = api_key or os.getenv("GROQ_API_KEY")
if not self.api_key:
raise ValueError(
"Groq API key required. Set GROQ_API_KEY environment variable or pass api_key parameter.\n"
"Get your free API key at: https://console.groq.com/"
)
self.client = Groq(api_key=self.api_key)
self.model = model
self.temperature = temperature
self.max_tokens = max_tokens
print(f"[INFO] Groq LLM initialized with model: {self.model}")
def generate(
self,
prompt: str,
system_prompt: Optional[str] = None,
temperature: Optional[float] = None,
max_tokens: Optional[int] = None
) -> str:
"""Generate response from Groq API"""
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
temperature=temperature or self.temperature,
max_tokens=max_tokens or self.max_tokens,
top_p=1,
stream=False
)
return response.choices[0].message.content
except Exception as e:
print(f"[ERROR] Groq API error: {e}")
return f"Error generating response: {str(e)}"
def generate_stream(
self,
prompt: str,
system_prompt: Optional[str] = None,
temperature: Optional[float] = None,
max_tokens: Optional[int] = None
) -> Generator[str, None, None]:
"""Generate streaming response from Groq API"""
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
stream = self.client.chat.completions.create(
model=self.model,
messages=messages,
temperature=temperature or self.temperature,
max_tokens=max_tokens or self.max_tokens,
top_p=1,
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
except Exception as e:
print(f"[ERROR] Groq API streaming error: {e}")
yield f"Error generating response: {str(e)}"
def generate_with_citations(
self,
question: str,
contexts: List[Dict],
max_contexts: int = 5
) -> Dict:
"""
Generate answer with proper citations from retrieved contexts
Args:
question: User question
contexts: List of retrieval results with text and metadata
max_contexts: Maximum number of contexts to use
Returns:
Dict with 'answer' and 'citations'
"""
# Prepare context text with numbered references
context_texts = []
citations = []
for i, ctx in enumerate(contexts[:max_contexts], 1):
context_texts.append(f"[{i}] {ctx['text']}")
citations.append({
"id": i,
"source": ctx.get('source_title', 'Unknown'),
"url": ctx.get('source_url', ''),
"relevance_score": ctx.get('combined_score', 0.0)
})
combined_context = "\n\n".join(context_texts)
# Create prompt with citation instructions
system_prompt = """You are an expert on Ireland with deep knowledge of Irish history, culture, geography, and current affairs.
Your task is to answer questions about Ireland accurately and comprehensively using the provided context.
IMPORTANT INSTRUCTIONS:
1. Base your answer ONLY on the provided context
2. Use inline citations like [1], [2] to reference sources
3. If the context doesn't contain enough information, say so clearly
4. Be specific and factual
5. Organize complex answers with clear structure
6. For historical facts, include relevant dates and details"""
user_prompt = f"""Context from Wikipedia articles about Ireland:
{combined_context}
Question: {question}
Please provide a comprehensive answer using the context above. Include inline citations [1], [2], etc. to reference your sources."""
# Generate answer
answer = self.generate(
prompt=user_prompt,
system_prompt=system_prompt,
temperature=0.1, # Low temperature for factual accuracy
max_tokens=1024
)
return {
"answer": answer,
"citations": citations,
"num_contexts_used": len(context_texts)
}
def generate_community_summary(self, community_data: Dict) -> str:
"""Generate natural language summary for a community"""
top_entities = [e["entity"] for e in community_data.get("top_entities", [])[:10]]
sources = community_data.get("sources", [])[:5]
text_sample = community_data.get("combined_text_sample", "")
prompt = f"""Analyze this cluster of related Wikipedia content about Ireland and generate a concise summary (2-3 sentences).
Key Topics/Entities: {", ".join(top_entities)}
Main Wikipedia Articles: {", ".join(sources)}
Sample Text: {text_sample[:500]}
Generate a brief summary describing what this content cluster is about:"""
system_prompt = "You are an expert at analyzing and summarizing Irish historical and cultural content."
summary = self.generate(
prompt=prompt,
system_prompt=system_prompt,
temperature=0.3,
max_tokens=150
)
return summary
if __name__ == "__main__":
# Test Groq LLM
llm = GroqLLM()
# Simple test
response = llm.generate(
prompt="What is the capital of Ireland?",
system_prompt="You are an expert on Ireland. Answer briefly and accurately."
)
print("Response:", response)
# Test with citations
test_contexts = [
{
"text": "Dublin is the capital and largest city of Ireland. It is located on the east coast.",
"source_title": "Dublin",
"source_url": "https://en.wikipedia.org/wiki/Dublin",
"combined_score": 0.95
},
{
"text": "Ireland's capital city has been Dublin since medieval times.",
"source_title": "Ireland",
"source_url": "https://en.wikipedia.org/wiki/Ireland",
"combined_score": 0.87
}
]
result = llm.generate_with_citations(
question="What is the capital of Ireland?",
contexts=test_contexts
)
print("\nAnswer with citations:")
print(result["answer"])
print("\nCitations:")
for cite in result["citations"]:
print(f"[{cite['id']}] {cite['source']}")
|