history fix
Browse files
app.py
CHANGED
|
@@ -172,6 +172,16 @@ def chat_with_llm(messages: List[Dict[str, str]], max_tokens: int = 200, tempera
|
|
| 172 |
)
|
| 173 |
|
| 174 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
response = hf_client.chat_completion(
|
| 176 |
messages=chat_messages,
|
| 177 |
max_tokens=max_tokens,
|
|
@@ -200,14 +210,26 @@ def chat_with_llm(messages: List[Dict[str, str]], max_tokens: int = 200, tempera
|
|
| 200 |
# Otherwise convert to string
|
| 201 |
return str(response).strip()
|
| 202 |
except Exception as chat_error:
|
| 203 |
-
# Chat completion failed -
|
| 204 |
error_str = str(chat_error).lower()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
# Never try text_generation for conversational-only models (Mistral, etc.)
|
| 207 |
if model_supports_only_conversational:
|
| 208 |
# Model only supports conversational/chat completion, don't try text generation
|
| 209 |
print(f"Chat completion failed for conversational-only model ({HF_MODEL}): {chat_error}")
|
| 210 |
-
raise
|
| 211 |
|
| 212 |
# Also check error message for indicators that model only supports conversational
|
| 213 |
if "conversational" in error_str or ("text-generation" in error_str and "not supported" in error_str):
|
|
|
|
| 172 |
)
|
| 173 |
|
| 174 |
try:
|
| 175 |
+
# Limit conversation history to prevent context length issues
|
| 176 |
+
# Keep system message + last 10 message pairs (20 messages max)
|
| 177 |
+
if len(chat_messages) > 21: # 1 system + 20 conversation messages
|
| 178 |
+
# Keep system message and truncate to last 20 messages
|
| 179 |
+
system_msg = chat_messages[0] if chat_messages[0].get("role") == "system" else None
|
| 180 |
+
recent_messages = chat_messages[-20:] if not system_msg else [system_msg] + chat_messages[-20:]
|
| 181 |
+
chat_messages = recent_messages
|
| 182 |
+
print(f"DEBUG: Truncated conversation history from {len(chat_messages) + 1} to {len(chat_messages)} messages")
|
| 183 |
+
|
| 184 |
+
print(f"DEBUG: Sending {len(chat_messages)} messages to Hugging Face API")
|
| 185 |
response = hf_client.chat_completion(
|
| 186 |
messages=chat_messages,
|
| 187 |
max_tokens=max_tokens,
|
|
|
|
| 210 |
# Otherwise convert to string
|
| 211 |
return str(response).strip()
|
| 212 |
except Exception as chat_error:
|
| 213 |
+
# Chat completion failed - log the full error for debugging
|
| 214 |
error_str = str(chat_error).lower()
|
| 215 |
+
error_full = str(chat_error)
|
| 216 |
+
print(f"ERROR: Hugging Face chat_completion failed: {error_full}")
|
| 217 |
+
print(f"ERROR: Error type: {type(chat_error)}")
|
| 218 |
+
print(f"ERROR: Number of messages sent: {len(chat_messages)}")
|
| 219 |
+
|
| 220 |
+
# Check for specific error types
|
| 221 |
+
if "rate limit" in error_str or "429" in error_str or "quota" in error_str:
|
| 222 |
+
raise Exception(f"Rate limit exceeded. Please wait a moment and try again. Error: {error_full}")
|
| 223 |
+
elif "timeout" in error_str or "timed out" in error_str:
|
| 224 |
+
raise Exception(f"Request timed out. The conversation might be too long. Please try a shorter message. Error: {error_full}")
|
| 225 |
+
elif "context length" in error_str or "token" in error_str and "limit" in error_str:
|
| 226 |
+
raise Exception(f"Conversation too long. Please start a new conversation. Error: {error_full}")
|
| 227 |
|
| 228 |
# Never try text_generation for conversational-only models (Mistral, etc.)
|
| 229 |
if model_supports_only_conversational:
|
| 230 |
# Model only supports conversational/chat completion, don't try text generation
|
| 231 |
print(f"Chat completion failed for conversational-only model ({HF_MODEL}): {chat_error}")
|
| 232 |
+
raise Exception(f"Hugging Face API error with {HF_MODEL}: {error_full}")
|
| 233 |
|
| 234 |
# Also check error message for indicators that model only supports conversational
|
| 235 |
if "conversational" in error_str or ("text-generation" in error_str and "not supported" in error_str):
|