Spaces:

MCP-1st-Birthday
/

TraceMind-mcp-server

Running

Mandark-droid commited on 26 days ago

Commit

32c36f9

1 Parent(s): eb3c2b5

perf: Optimize Gemini responses for HF Spaces SSE stability

Optimizations to prevent SSE timeout errors:
- Reduce max_output_tokens from 8192 to 4096 (faster responses)
- Add 30-second timeout to Gemini API calls
- Add asyncio.wait_for() wrapper to prevent hanging
- Add timeout error handling with user-friendly message

These changes help mitigate SSE streaming issues on HF Spaces where
long-running tool calls cause ASGI/Starlette transport errors.

Files changed (1) hide show

gemini_client.py +18 -6

gemini_client.py CHANGED Viewed

@@ -31,13 +31,17 @@ class GeminiClient:
         self.model = genai.GenerativeModel(model_name)
         # Generation config for consistent outputs
         self.generation_config = {
             "temperature": 0.7,
             "top_p": 0.95,
             "top_k": 40,
-            "max_output_tokens": 8192,
         }
     async def analyze_with_context(
         self,
         data: Dict[str, Any],
@@ -116,17 +120,25 @@ Format your response in clear markdown with cost breakdowns and optimization tip
         user_prompt += "Provide your analysis:"
-        # Generate response
         try:
-            response = await self.model.generate_content_async(
-                user_prompt,
-                generation_config=self.generation_config
             )
             return response.text
         except Exception as e:
-            return f"Error generating analysis: {str(e)}"
     async def generate_summary(
         self,

         self.model = genai.GenerativeModel(model_name)
         # Generation config for consistent outputs
+        # Reduced max_output_tokens for faster responses on HF Spaces
         self.generation_config = {
             "temperature": 0.7,
             "top_p": 0.95,
             "top_k": 40,
+            "max_output_tokens": 4096,  # Reduced from 8192 for faster responses
         }
+        # Request timeout (30 seconds for HF Spaces compatibility)
+        self.request_timeout = 30
     async def analyze_with_context(
         self,
         data: Dict[str, Any],
         user_prompt += "Provide your analysis:"
+        # Generate response with timeout handling
         try:
+            import asyncio
+            # Add timeout to prevent hanging on HF Spaces
+            response = await asyncio.wait_for(
+                self.model.generate_content_async(
+                    user_prompt,
+                    generation_config=self.generation_config
+                ),
+                timeout=self.request_timeout
             )
             return response.text
+        except asyncio.TimeoutError:
+            return "⏱️ **Analysis timed out**. The request took too long. Try analyzing a smaller dataset or simplifying the query."
         except Exception as e:
+            return f"❌ **Error generating analysis**: {str(e)}"
     async def generate_summary(
         self,