Spaces:
Sleeping
Sleeping
KUNAL SHAW
commited on
Commit
·
123d70b
1
Parent(s):
add1aec
Fix Groq API model name and improve error handling
Browse files
app.py
CHANGED
|
@@ -359,21 +359,40 @@ def search_similar_questions(question: str) -> list:
|
|
| 359 |
|
| 360 |
# Step 3 - Custom LLM
|
| 361 |
from openai import OpenAI
|
| 362 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
# Use environment variables for flexibility (OpenAI, Groq, or Custom HF Endpoint)
|
| 364 |
-
base_url = os.environ.get("LLM_BASE_URL", "https://api.
|
| 365 |
-
api_key = os.environ.get("LLM_API_KEY"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
|
| 367 |
client = OpenAI(base_url=base_url, api_key=api_key)
|
| 368 |
response = client.chat.completions.create(
|
| 369 |
model=model,
|
| 370 |
messages=[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
{
|
| 372 |
"role": "user",
|
| 373 |
-
"content":
|
| 374 |
}
|
| 375 |
],
|
| 376 |
stream=True,
|
|
|
|
|
|
|
| 377 |
)
|
| 378 |
return response
|
| 379 |
# Zephyr formatter
|
|
@@ -474,14 +493,17 @@ def custom_llm(
|
|
| 474 |
#print(output)
|
| 475 |
#yield output
|
| 476 |
except Exception as e:
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
else:
|
| 482 |
-
|
| 483 |
-
#gr.Warning("Unfortunately Mistral is unable to process")
|
| 484 |
-
output = "I do not know what happened but I could not understand you ."
|
| 485 |
|
| 486 |
return output
|
| 487 |
|
|
|
|
| 359 |
|
| 360 |
# Step 3 - Custom LLM
|
| 361 |
from openai import OpenAI
|
| 362 |
+
|
| 363 |
+
# Get model name from environment or use Groq's mixtral model
|
| 364 |
+
LLM_MODEL = os.environ.get("LLM_MODEL", "mixtral-8x7b-32768")
|
| 365 |
+
|
| 366 |
+
def generate_stream(prompt, model=None):
|
| 367 |
# Use environment variables for flexibility (OpenAI, Groq, or Custom HF Endpoint)
|
| 368 |
+
base_url = os.environ.get("LLM_BASE_URL", "https://api.groq.com/openai/v1")
|
| 369 |
+
api_key = os.environ.get("LLM_API_KEY")
|
| 370 |
+
|
| 371 |
+
if not api_key:
|
| 372 |
+
print("ERROR: LLM_API_KEY not set!")
|
| 373 |
+
return None
|
| 374 |
+
|
| 375 |
+
if model is None:
|
| 376 |
+
model = LLM_MODEL
|
| 377 |
+
|
| 378 |
+
print(f"Using LLM: {model} at {base_url}")
|
| 379 |
|
| 380 |
client = OpenAI(base_url=base_url, api_key=api_key)
|
| 381 |
response = client.chat.completions.create(
|
| 382 |
model=model,
|
| 383 |
messages=[
|
| 384 |
+
{
|
| 385 |
+
"role": "system",
|
| 386 |
+
"content": "You are VitalSync AI, a helpful medical assistant. Provide concise, accurate health information."
|
| 387 |
+
},
|
| 388 |
{
|
| 389 |
"role": "user",
|
| 390 |
+
"content": prompt,
|
| 391 |
}
|
| 392 |
],
|
| 393 |
stream=True,
|
| 394 |
+
temperature=0.7,
|
| 395 |
+
max_tokens=512,
|
| 396 |
)
|
| 397 |
return response
|
| 398 |
# Zephyr formatter
|
|
|
|
| 493 |
#print(output)
|
| 494 |
#yield output
|
| 495 |
except Exception as e:
|
| 496 |
+
error_msg = str(e)
|
| 497 |
+
print(f"LLM ERROR: {error_msg}")
|
| 498 |
+
|
| 499 |
+
if "Too Many Requests" in error_msg or "rate_limit" in error_msg.lower():
|
| 500 |
+
output = "I'm receiving too many requests right now. Please try again in a moment."
|
| 501 |
+
elif "authentication" in error_msg.lower() or "api_key" in error_msg.lower() or "401" in error_msg:
|
| 502 |
+
output = "There's an authentication issue with the AI service. Please check the API configuration."
|
| 503 |
+
elif "model" in error_msg.lower() and "not found" in error_msg.lower():
|
| 504 |
+
output = f"The AI model is not available. Error: {error_msg}"
|
| 505 |
else:
|
| 506 |
+
output = f"I encountered an error while processing your request. Technical details: {error_msg[:200]}"
|
|
|
|
|
|
|
| 507 |
|
| 508 |
return output
|
| 509 |
|