Gus9025 commited on
Commit
5e48c39
·
1 Parent(s): d6b879e

history fix

Browse files
Files changed (1) hide show
  1. app.py +24 -2
app.py CHANGED
@@ -172,6 +172,16 @@ def chat_with_llm(messages: List[Dict[str, str]], max_tokens: int = 200, tempera
172
  )
173
 
174
  try:
 
 
 
 
 
 
 
 
 
 
175
  response = hf_client.chat_completion(
176
  messages=chat_messages,
177
  max_tokens=max_tokens,
@@ -200,14 +210,26 @@ def chat_with_llm(messages: List[Dict[str, str]], max_tokens: int = 200, tempera
200
  # Otherwise convert to string
201
  return str(response).strip()
202
  except Exception as chat_error:
203
- # Chat completion failed - check if model only supports conversational
204
  error_str = str(chat_error).lower()
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  # Never try text_generation for conversational-only models (Mistral, etc.)
207
  if model_supports_only_conversational:
208
  # Model only supports conversational/chat completion, don't try text generation
209
  print(f"Chat completion failed for conversational-only model ({HF_MODEL}): {chat_error}")
210
- raise chat_error
211
 
212
  # Also check error message for indicators that model only supports conversational
213
  if "conversational" in error_str or ("text-generation" in error_str and "not supported" in error_str):
 
172
  )
173
 
174
  try:
175
+ # Limit conversation history to prevent context length issues
176
+ # Keep system message + last 10 message pairs (20 messages max)
177
+ if len(chat_messages) > 21: # 1 system + 20 conversation messages
178
+ # Keep system message and truncate to last 20 messages
179
+ system_msg = chat_messages[0] if chat_messages[0].get("role") == "system" else None
180
+ recent_messages = chat_messages[-20:] if not system_msg else [system_msg] + chat_messages[-20:]
181
+ chat_messages = recent_messages
182
+ print(f"DEBUG: Truncated conversation history from {len(chat_messages) + 1} to {len(chat_messages)} messages")
183
+
184
+ print(f"DEBUG: Sending {len(chat_messages)} messages to Hugging Face API")
185
  response = hf_client.chat_completion(
186
  messages=chat_messages,
187
  max_tokens=max_tokens,
 
210
  # Otherwise convert to string
211
  return str(response).strip()
212
  except Exception as chat_error:
213
+ # Chat completion failed - log the full error for debugging
214
  error_str = str(chat_error).lower()
215
+ error_full = str(chat_error)
216
+ print(f"ERROR: Hugging Face chat_completion failed: {error_full}")
217
+ print(f"ERROR: Error type: {type(chat_error)}")
218
+ print(f"ERROR: Number of messages sent: {len(chat_messages)}")
219
+
220
+ # Check for specific error types
221
+ if "rate limit" in error_str or "429" in error_str or "quota" in error_str:
222
+ raise Exception(f"Rate limit exceeded. Please wait a moment and try again. Error: {error_full}")
223
+ elif "timeout" in error_str or "timed out" in error_str:
224
+ raise Exception(f"Request timed out. The conversation might be too long. Please try a shorter message. Error: {error_full}")
225
+ elif "context length" in error_str or "token" in error_str and "limit" in error_str:
226
+ raise Exception(f"Conversation too long. Please start a new conversation. Error: {error_full}")
227
 
228
  # Never try text_generation for conversational-only models (Mistral, etc.)
229
  if model_supports_only_conversational:
230
  # Model only supports conversational/chat completion, don't try text generation
231
  print(f"Chat completion failed for conversational-only model ({HF_MODEL}): {chat_error}")
232
+ raise Exception(f"Hugging Face API error with {HF_MODEL}: {error_full}")
233
 
234
  # Also check error message for indicators that model only supports conversational
235
  if "conversational" in error_str or ("text-generation" in error_str and "not supported" in error_str):