Mandark-droid commited on
Commit
32c36f9
·
1 Parent(s): eb3c2b5

perf: Optimize Gemini responses for HF Spaces SSE stability

Browse files

Optimizations to prevent SSE timeout errors:
- Reduce max_output_tokens from 8192 to 4096 (faster responses)
- Add 30-second timeout to Gemini API calls
- Add asyncio.wait_for() wrapper to prevent hanging
- Add timeout error handling with user-friendly message

These changes help mitigate SSE streaming issues on HF Spaces where
long-running tool calls cause ASGI/Starlette transport errors.

Files changed (1) hide show
  1. gemini_client.py +18 -6
gemini_client.py CHANGED
@@ -31,13 +31,17 @@ class GeminiClient:
31
  self.model = genai.GenerativeModel(model_name)
32
 
33
  # Generation config for consistent outputs
 
34
  self.generation_config = {
35
  "temperature": 0.7,
36
  "top_p": 0.95,
37
  "top_k": 40,
38
- "max_output_tokens": 8192,
39
  }
40
 
 
 
 
41
  async def analyze_with_context(
42
  self,
43
  data: Dict[str, Any],
@@ -116,17 +120,25 @@ Format your response in clear markdown with cost breakdowns and optimization tip
116
 
117
  user_prompt += "Provide your analysis:"
118
 
119
- # Generate response
120
  try:
121
- response = await self.model.generate_content_async(
122
- user_prompt,
123
- generation_config=self.generation_config
 
 
 
 
 
 
124
  )
125
 
126
  return response.text
127
 
 
 
128
  except Exception as e:
129
- return f"Error generating analysis: {str(e)}"
130
 
131
  async def generate_summary(
132
  self,
 
31
  self.model = genai.GenerativeModel(model_name)
32
 
33
  # Generation config for consistent outputs
34
+ # Reduced max_output_tokens for faster responses on HF Spaces
35
  self.generation_config = {
36
  "temperature": 0.7,
37
  "top_p": 0.95,
38
  "top_k": 40,
39
+ "max_output_tokens": 4096, # Reduced from 8192 for faster responses
40
  }
41
 
42
+ # Request timeout (30 seconds for HF Spaces compatibility)
43
+ self.request_timeout = 30
44
+
45
  async def analyze_with_context(
46
  self,
47
  data: Dict[str, Any],
 
120
 
121
  user_prompt += "Provide your analysis:"
122
 
123
+ # Generate response with timeout handling
124
  try:
125
+ import asyncio
126
+
127
+ # Add timeout to prevent hanging on HF Spaces
128
+ response = await asyncio.wait_for(
129
+ self.model.generate_content_async(
130
+ user_prompt,
131
+ generation_config=self.generation_config
132
+ ),
133
+ timeout=self.request_timeout
134
  )
135
 
136
  return response.text
137
 
138
+ except asyncio.TimeoutError:
139
+ return "⏱️ **Analysis timed out**. The request took too long. Try analyzing a smaller dataset or simplifying the query."
140
  except Exception as e:
141
+ return f"❌ **Error generating analysis**: {str(e)}"
142
 
143
  async def generate_summary(
144
  self,