KUNAL SHAW commited on
Commit
123d70b
·
1 Parent(s): add1aec

Fix Groq API model name and improve error handling

Browse files
Files changed (1) hide show
  1. app.py +33 -11
app.py CHANGED
@@ -359,21 +359,40 @@ def search_similar_questions(question: str) -> list:
359
 
360
  # Step 3 - Custom LLM
361
  from openai import OpenAI
362
- def generate_stream(prompt, model="mixtral-8x7b"):
 
 
 
 
363
  # Use environment variables for flexibility (OpenAI, Groq, or Custom HF Endpoint)
364
- base_url = os.environ.get("LLM_BASE_URL", "https://api.openai.com/v1")
365
- api_key = os.environ.get("LLM_API_KEY", "sk-xxxxx")
 
 
 
 
 
 
 
 
 
366
 
367
  client = OpenAI(base_url=base_url, api_key=api_key)
368
  response = client.chat.completions.create(
369
  model=model,
370
  messages=[
 
 
 
 
371
  {
372
  "role": "user",
373
- "content": "{}".format(prompt),
374
  }
375
  ],
376
  stream=True,
 
 
377
  )
378
  return response
379
  # Zephyr formatter
@@ -474,14 +493,17 @@ def custom_llm(
474
  #print(output)
475
  #yield output
476
  except Exception as e:
477
- if "Too Many Requests" in str(e):
478
- print("ERROR: Too many requests on mistral client")
479
- #gr.Warning("Unfortunately Mistral is unable to process")
480
- output = "Unfortunately I am not able to process your request now !"
 
 
 
 
 
481
  else:
482
- print("Unhandled Exception: ", str(e))
483
- #gr.Warning("Unfortunately Mistral is unable to process")
484
- output = "I do not know what happened but I could not understand you ."
485
 
486
  return output
487
 
 
359
 
360
  # Step 3 - Custom LLM
361
  from openai import OpenAI
362
+
363
+ # Get model name from environment or use Groq's mixtral model
364
+ LLM_MODEL = os.environ.get("LLM_MODEL", "mixtral-8x7b-32768")
365
+
366
+ def generate_stream(prompt, model=None):
367
  # Use environment variables for flexibility (OpenAI, Groq, or Custom HF Endpoint)
368
+ base_url = os.environ.get("LLM_BASE_URL", "https://api.groq.com/openai/v1")
369
+ api_key = os.environ.get("LLM_API_KEY")
370
+
371
+ if not api_key:
372
+ print("ERROR: LLM_API_KEY not set!")
373
+ return None
374
+
375
+ if model is None:
376
+ model = LLM_MODEL
377
+
378
+ print(f"Using LLM: {model} at {base_url}")
379
 
380
  client = OpenAI(base_url=base_url, api_key=api_key)
381
  response = client.chat.completions.create(
382
  model=model,
383
  messages=[
384
+ {
385
+ "role": "system",
386
+ "content": "You are VitalSync AI, a helpful medical assistant. Provide concise, accurate health information."
387
+ },
388
  {
389
  "role": "user",
390
+ "content": prompt,
391
  }
392
  ],
393
  stream=True,
394
+ temperature=0.7,
395
+ max_tokens=512,
396
  )
397
  return response
398
  # Zephyr formatter
 
493
  #print(output)
494
  #yield output
495
  except Exception as e:
496
+ error_msg = str(e)
497
+ print(f"LLM ERROR: {error_msg}")
498
+
499
+ if "Too Many Requests" in error_msg or "rate_limit" in error_msg.lower():
500
+ output = "I'm receiving too many requests right now. Please try again in a moment."
501
+ elif "authentication" in error_msg.lower() or "api_key" in error_msg.lower() or "401" in error_msg:
502
+ output = "There's an authentication issue with the AI service. Please check the API configuration."
503
+ elif "model" in error_msg.lower() and "not found" in error_msg.lower():
504
+ output = f"The AI model is not available. Error: {error_msg}"
505
  else:
506
+ output = f"I encountered an error while processing your request. Technical details: {error_msg[:200]}"
 
 
507
 
508
  return output
509