Spaces:

KunalShaw
/

VitalSync-AI

Sleeping

App Files Files Community

KUNAL SHAW commited on 9 days ago

Commit

123d70b

1 Parent(s): add1aec

Fix Groq API model name and improve error handling

Browse files

Files changed (1) hide show

app.py +33 -11

app.py CHANGED Viewed

@@ -359,21 +359,40 @@ def search_similar_questions(question: str) -> list:
 # Step 3  - Custom LLM
 from openai import OpenAI
-def generate_stream(prompt, model="mixtral-8x7b"):
     # Use environment variables for flexibility (OpenAI, Groq, or Custom HF Endpoint)
-    base_url = os.environ.get("LLM_BASE_URL", "https://api.openai.com/v1")
-    api_key = os.environ.get("LLM_API_KEY", "sk-xxxxx")
     client = OpenAI(base_url=base_url, api_key=api_key)
     response = client.chat.completions.create(
         model=model,
         messages=[
             {
                 "role": "user",
-                "content": "{}".format(prompt),
             }
         ],
         stream=True,
     )
     return response
 # Zephyr formatter
@@ -474,14 +493,17 @@ def custom_llm(
         #print(output)
         #yield output
     except Exception as e:
-        if "Too Many Requests" in str(e):
-            print("ERROR: Too many requests on mistral client")
-            #gr.Warning("Unfortunately Mistral is unable to process")
-            output = "Unfortunately I am not able to process your request now !"
         else:
-            print("Unhandled Exception: ", str(e))
-            #gr.Warning("Unfortunately Mistral is unable to process")
-            output = "I do not know what happened but I could not understand you ."
     return output

 # Step 3  - Custom LLM
 from openai import OpenAI
+# Get model name from environment or use Groq's mixtral model
+LLM_MODEL = os.environ.get("LLM_MODEL", "mixtral-8x7b-32768")
+def generate_stream(prompt, model=None):
     # Use environment variables for flexibility (OpenAI, Groq, or Custom HF Endpoint)
+    base_url = os.environ.get("LLM_BASE_URL", "https://api.groq.com/openai/v1")
+    api_key = os.environ.get("LLM_API_KEY")
+    if not api_key:
+        print("ERROR: LLM_API_KEY not set!")
+        return None
+    if model is None:
+        model = LLM_MODEL
+    print(f"Using LLM: {model} at {base_url}")
     client = OpenAI(base_url=base_url, api_key=api_key)
     response = client.chat.completions.create(
         model=model,
         messages=[
+            {
+                "role": "system",
+                "content": "You are VitalSync AI, a helpful medical assistant. Provide concise, accurate health information."
+            },
             {
                 "role": "user",
+                "content": prompt,
             }
         ],
         stream=True,
+        temperature=0.7,
+        max_tokens=512,
     )
     return response
 # Zephyr formatter
         #print(output)
         #yield output
     except Exception as e:
+        error_msg = str(e)
+        print(f"LLM ERROR: {error_msg}")
+        if "Too Many Requests" in error_msg or "rate_limit" in error_msg.lower():
+            output = "I'm receiving too many requests right now. Please try again in a moment."
+        elif "authentication" in error_msg.lower() or "api_key" in error_msg.lower() or "401" in error_msg:
+            output = "There's an authentication issue with the AI service. Please check the API configuration."
+        elif "model" in error_msg.lower() and "not found" in error_msg.lower():
+            output = f"The AI model is not available. Error: {error_msg}"
         else:
+            output = f"I encountered an error while processing your request. Technical details: {error_msg[:200]}"
     return output