Mandark-droid
commited on
Commit
·
32c36f9
1
Parent(s):
eb3c2b5
perf: Optimize Gemini responses for HF Spaces SSE stability
Browse filesOptimizations to prevent SSE timeout errors:
- Reduce max_output_tokens from 8192 to 4096 (faster responses)
- Add 30-second timeout to Gemini API calls
- Add asyncio.wait_for() wrapper to prevent hanging
- Add timeout error handling with user-friendly message
These changes help mitigate SSE streaming issues on HF Spaces where
long-running tool calls cause ASGI/Starlette transport errors.
- gemini_client.py +18 -6
gemini_client.py
CHANGED
|
@@ -31,13 +31,17 @@ class GeminiClient:
|
|
| 31 |
self.model = genai.GenerativeModel(model_name)
|
| 32 |
|
| 33 |
# Generation config for consistent outputs
|
|
|
|
| 34 |
self.generation_config = {
|
| 35 |
"temperature": 0.7,
|
| 36 |
"top_p": 0.95,
|
| 37 |
"top_k": 40,
|
| 38 |
-
"max_output_tokens": 8192
|
| 39 |
}
|
| 40 |
|
|
|
|
|
|
|
|
|
|
| 41 |
async def analyze_with_context(
|
| 42 |
self,
|
| 43 |
data: Dict[str, Any],
|
|
@@ -116,17 +120,25 @@ Format your response in clear markdown with cost breakdowns and optimization tip
|
|
| 116 |
|
| 117 |
user_prompt += "Provide your analysis:"
|
| 118 |
|
| 119 |
-
# Generate response
|
| 120 |
try:
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
)
|
| 125 |
|
| 126 |
return response.text
|
| 127 |
|
|
|
|
|
|
|
| 128 |
except Exception as e:
|
| 129 |
-
return f"Error generating analysis
|
| 130 |
|
| 131 |
async def generate_summary(
|
| 132 |
self,
|
|
|
|
| 31 |
self.model = genai.GenerativeModel(model_name)
|
| 32 |
|
| 33 |
# Generation config for consistent outputs
|
| 34 |
+
# Reduced max_output_tokens for faster responses on HF Spaces
|
| 35 |
self.generation_config = {
|
| 36 |
"temperature": 0.7,
|
| 37 |
"top_p": 0.95,
|
| 38 |
"top_k": 40,
|
| 39 |
+
"max_output_tokens": 4096, # Reduced from 8192 for faster responses
|
| 40 |
}
|
| 41 |
|
| 42 |
+
# Request timeout (30 seconds for HF Spaces compatibility)
|
| 43 |
+
self.request_timeout = 30
|
| 44 |
+
|
| 45 |
async def analyze_with_context(
|
| 46 |
self,
|
| 47 |
data: Dict[str, Any],
|
|
|
|
| 120 |
|
| 121 |
user_prompt += "Provide your analysis:"
|
| 122 |
|
| 123 |
+
# Generate response with timeout handling
|
| 124 |
try:
|
| 125 |
+
import asyncio
|
| 126 |
+
|
| 127 |
+
# Add timeout to prevent hanging on HF Spaces
|
| 128 |
+
response = await asyncio.wait_for(
|
| 129 |
+
self.model.generate_content_async(
|
| 130 |
+
user_prompt,
|
| 131 |
+
generation_config=self.generation_config
|
| 132 |
+
),
|
| 133 |
+
timeout=self.request_timeout
|
| 134 |
)
|
| 135 |
|
| 136 |
return response.text
|
| 137 |
|
| 138 |
+
except asyncio.TimeoutError:
|
| 139 |
+
return "⏱️ **Analysis timed out**. The request took too long. Try analyzing a smaller dataset or simplifying the query."
|
| 140 |
except Exception as e:
|
| 141 |
+
return f"❌ **Error generating analysis**: {str(e)}"
|
| 142 |
|
| 143 |
async def generate_summary(
|
| 144 |
self,
|