Spaces:
Sleeping
Sleeping
YanBoChen
commited on
Commit
Β·
5b7c9f8
1
Parent(s):
e84171b
Add dual task processing method for medical query analysis in llm_Med42_70BClient and update user_prompt to utilize it
Browse files- src/llm_clients.py +110 -0
- src/user_prompt.py +4 -4
src/llm_clients.py
CHANGED
|
@@ -273,6 +273,116 @@ class llm_Med42_70BClient:
|
|
| 273 |
'latency': latency # Include latency even for error cases
|
| 274 |
}
|
| 275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
def extract_medical_keywords_for_customization(
|
| 277 |
self,
|
| 278 |
query: str,
|
|
|
|
| 273 |
'latency': latency # Include latency even for error cases
|
| 274 |
}
|
| 275 |
|
| 276 |
+
def analyze_medical_query_dual_task(
|
| 277 |
+
self,
|
| 278 |
+
user_query: str,
|
| 279 |
+
max_tokens: int = 100,
|
| 280 |
+
timeout: Optional[float] = None
|
| 281 |
+
) -> Dict[str, Union[str, float]]:
|
| 282 |
+
"""
|
| 283 |
+
Analyze medical query with dual task processing (Level 2+4 Combined).
|
| 284 |
+
|
| 285 |
+
Performs both condition extraction and medical query validation in single LLM call.
|
| 286 |
+
Specifically designed for user_prompt.py Level 2+4 combined processing.
|
| 287 |
+
|
| 288 |
+
Args:
|
| 289 |
+
user_query: Original user medical query (not wrapped prompt)
|
| 290 |
+
max_tokens: Maximum tokens to generate
|
| 291 |
+
timeout: Specific API call timeout
|
| 292 |
+
|
| 293 |
+
Returns:
|
| 294 |
+
Dict containing dual task results with structured format
|
| 295 |
+
"""
|
| 296 |
+
import time
|
| 297 |
+
|
| 298 |
+
# Start timing
|
| 299 |
+
start_time = time.time()
|
| 300 |
+
|
| 301 |
+
try:
|
| 302 |
+
self.logger.info(f"Calling Medical LLM (Dual Task) with query: {user_query}")
|
| 303 |
+
|
| 304 |
+
# Prepare chat completion request with dual task system prompt
|
| 305 |
+
response = self.client.chat.completions.create(
|
| 306 |
+
model="m42-health/Llama3-Med42-70B",
|
| 307 |
+
messages=[
|
| 308 |
+
{
|
| 309 |
+
"role": "system",
|
| 310 |
+
"content": """Medical Query Analysis - Dual Task Processing:
|
| 311 |
+
|
| 312 |
+
1. Extract primary medical condition (if specific condition identifiable)
|
| 313 |
+
2. Determine if this is a medical-related query
|
| 314 |
+
|
| 315 |
+
RESPONSE FORMAT:
|
| 316 |
+
MEDICAL: YES/NO
|
| 317 |
+
CONDITION: [specific condition name or "NONE"]
|
| 318 |
+
CONFIDENCE: [0.1-1.0]
|
| 319 |
+
|
| 320 |
+
EXAMPLES:
|
| 321 |
+
- "chest pain and shortness of breath" β MEDICAL: YES, CONDITION: Acute Coronary Syndrome, CONFIDENCE: 0.9
|
| 322 |
+
- "how to cook pasta safely" β MEDICAL: NO, CONDITION: NONE, CONFIDENCE: 0.95
|
| 323 |
+
- "persistent headache treatment options" β MEDICAL: YES, CONDITION: Headache Disorder, CONFIDENCE: 0.8
|
| 324 |
+
- "feeling unwell lately" β MEDICAL: YES, CONDITION: NONE, CONFIDENCE: 0.6
|
| 325 |
+
|
| 326 |
+
Return ONLY the specified format."""
|
| 327 |
+
},
|
| 328 |
+
{
|
| 329 |
+
"role": "user",
|
| 330 |
+
"content": user_query
|
| 331 |
+
}
|
| 332 |
+
],
|
| 333 |
+
max_tokens=max_tokens,
|
| 334 |
+
temperature=0 # Ensure deterministic responses
|
| 335 |
+
)
|
| 336 |
+
|
| 337 |
+
# Calculate latency
|
| 338 |
+
end_time = time.time()
|
| 339 |
+
latency = end_time - start_time
|
| 340 |
+
|
| 341 |
+
# Extract the response text
|
| 342 |
+
response_text = response.choices[0].message.content or ""
|
| 343 |
+
|
| 344 |
+
# Log raw response and latency
|
| 345 |
+
self.logger.info(f"Raw LLM Dual Task Response: {response_text}")
|
| 346 |
+
self.logger.info(f"Dual Task Query Latency: {latency:.4f} seconds")
|
| 347 |
+
|
| 348 |
+
# Detect abnormal response
|
| 349 |
+
if self._is_abnormal_response(response_text):
|
| 350 |
+
self.logger.error(f"β Abnormal LLM dual task response detected: {response_text[:50]}...")
|
| 351 |
+
return {
|
| 352 |
+
'extracted_condition': '',
|
| 353 |
+
'confidence': '0',
|
| 354 |
+
'error': 'Abnormal LLM dual task response detected',
|
| 355 |
+
'raw_response': response_text,
|
| 356 |
+
'latency': latency
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
# Return structured response for Level 2+4 processing
|
| 360 |
+
return {
|
| 361 |
+
'extracted_condition': response_text, # For compatibility with existing logging
|
| 362 |
+
'confidence': '0.8', # Default confidence for successful dual task
|
| 363 |
+
'raw_response': response_text, # Contains MEDICAL/CONDITION/CONFIDENCE format
|
| 364 |
+
'latency': latency,
|
| 365 |
+
'dual_task_mode': True # Flag to indicate dual task processing
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
except Exception as e:
|
| 369 |
+
# Calculate latency even for failed requests
|
| 370 |
+
end_time = time.time()
|
| 371 |
+
latency = end_time - start_time
|
| 372 |
+
|
| 373 |
+
self.logger.error(f"Medical LLM dual task query error: {str(e)}")
|
| 374 |
+
self.logger.error(f"Error Type: {type(e).__name__}")
|
| 375 |
+
self.logger.error(f"Dual task query that caused error: {user_query}")
|
| 376 |
+
|
| 377 |
+
return {
|
| 378 |
+
'extracted_condition': '',
|
| 379 |
+
'confidence': '0',
|
| 380 |
+
'error': str(e),
|
| 381 |
+
'raw_response': '',
|
| 382 |
+
'latency': latency,
|
| 383 |
+
'dual_task_mode': True
|
| 384 |
+
}
|
| 385 |
+
|
| 386 |
def extract_medical_keywords_for_customization(
|
| 387 |
self,
|
| 388 |
query: str,
|
src/user_prompt.py
CHANGED
|
@@ -249,10 +249,10 @@ Return ONLY the specified format."""
|
|
| 249 |
|
| 250 |
logger.info("π€ COMBINED L2+4: Single LLM call for extraction + validation")
|
| 251 |
|
| 252 |
-
llama_response = self.llm_client.
|
| 253 |
-
|
| 254 |
-
max_tokens=100,
|
| 255 |
-
timeout=12.0
|
| 256 |
)
|
| 257 |
|
| 258 |
# Get both raw response and extracted condition
|
|
|
|
| 249 |
|
| 250 |
logger.info("π€ COMBINED L2+4: Single LLM call for extraction + validation")
|
| 251 |
|
| 252 |
+
llama_response = self.llm_client.analyze_medical_query_dual_task(
|
| 253 |
+
user_query=user_query, # Direct original query, not wrapped prompt
|
| 254 |
+
max_tokens=100,
|
| 255 |
+
timeout=12.0
|
| 256 |
)
|
| 257 |
|
| 258 |
# Get both raw response and extracted condition
|