Spaces:
Sleeping
Sleeping
| """ | |
| OnCall.ai LLM Clients Module | |
| Provides specialized LLM clients for medical query processing. | |
| Author: OnCall.ai Team | |
| Date: 2025-07-29 | |
| """ | |
| import logging | |
| import os | |
| from typing import Dict, Optional, Union | |
| from huggingface_hub import InferenceClient | |
| from dotenv import load_dotenv | |
| # Load environment variables from .env file | |
| load_dotenv() | |
| class llm_Med42_70BClient: | |
| def __init__( | |
| self, | |
| model_name: str = "m42-health/Llama3-Med42-70B", | |
| timeout: float = 30.0 | |
| ): | |
| """ | |
| Initialize Medical LLM client for query processing. | |
| Args: | |
| model_name: Hugging Face model name | |
| timeout: API call timeout duration | |
| Warning: This model should not be used for professional medical advice. | |
| """ | |
| self.logger = logging.getLogger(__name__) | |
| self.timeout = timeout | |
| # Configure logging to show detailed information | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
| ) | |
| # Get Hugging Face token from environment | |
| hf_token = os.getenv('HF_TOKEN') | |
| if not hf_token: | |
| self.logger.error("HF_TOKEN is missing from environment variables.") | |
| raise ValueError( | |
| "HF_TOKEN not found in environment variables. " | |
| "Please set HF_TOKEN in your .env file or environment. " | |
| "Ensure the token is not empty and is correctly set." | |
| ) | |
| try: | |
| # Initialize InferenceClient with the new model | |
| self.client = InferenceClient( | |
| provider="featherless-ai", | |
| api_key=hf_token | |
| ) | |
| self.logger.info(f"Medical LLM client initialized with model: {model_name}") | |
| self.logger.warning( | |
| "Medical LLM Model: Research tool only. " | |
| "Not for professional medical diagnosis." | |
| ) | |
| except Exception as e: | |
| self.logger.error(f"Failed to initialize InferenceClient: {str(e)}") | |
| self.logger.error(f"Error Type: {type(e).__name__}") | |
| self.logger.error(f"Detailed Error: {repr(e)}") | |
| raise ValueError(f"Failed to initialize Medical LLM client: {str(e)}") from e | |
| def analyze_medical_query( | |
| self, | |
| query: str, | |
| max_tokens: int = 100, | |
| timeout: Optional[float] = None | |
| ) -> Dict[str, Union[str, float]]: | |
| """ | |
| Analyze medical query and extract condition. | |
| Args: | |
| query: Medical query text | |
| max_tokens: Maximum tokens to generate | |
| timeout: Specific API call timeout | |
| Returns: | |
| Extracted medical condition information with latency | |
| """ | |
| import time | |
| # Start timing | |
| start_time = time.time() | |
| try: | |
| self.logger.info(f"Calling Medical LLM with query: {query}") | |
| # Prepare chat completion request with updated system prompt | |
| response = self.client.chat.completions.create( | |
| model="m42-health/Llama3-Med42-70B", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": """You are a medical assistant trained to extract medical conditions. | |
| HANDLING MULTIPLE CONDITIONS: | |
| 1. If query contains multiple medical conditions, extract the PRIMARY/ACUTE condition | |
| 2. Priority order: Life-threatening emergencies > Acute conditions > Chronic diseases > Symptoms | |
| 3. For patient scenarios, focus on the condition requiring immediate medical attention | |
| EXAMPLES: | |
| - Single: "chest pain" → "Acute Coronary Syndrome" | |
| - Multiple: "diabetic patient with chest pain" → "Acute Coronary Syndrome" | |
| - Chronic+Acute: "hypertension patient having seizure" → "Seizure Disorder" | |
| - Complex: "20-year-old female, porphyria, sudden seizure" → "Acute Seizure" | |
| - Emergency context: "porphyria patient with sudden seizure" → "Seizure Disorder" | |
| RESPONSE FORMAT: | |
| - Medical queries: Return ONLY the primary condition name | |
| - Non-medical queries: Return "NON_MEDICAL_QUERY" | |
| DO NOT provide explanations or medical advice.""" | |
| }, | |
| { | |
| "role": "user", | |
| "content": query | |
| } | |
| ], | |
| max_tokens=max_tokens | |
| ) | |
| # Calculate latency | |
| end_time = time.time() | |
| latency = end_time - start_time | |
| # Extract the response text | |
| response_text = response.choices[0].message.content or "" | |
| # Log raw response and latency | |
| self.logger.info(f"Raw LLM Response: {response_text}") | |
| self.logger.info(f"Query Latency: {latency:.4f} seconds") | |
| # Detect abnormal response | |
| if self._is_abnormal_response(response_text): | |
| self.logger.error(f"❌ Abnormal LLM response detected: {response_text[:50]}...") | |
| return { | |
| 'extracted_condition': '', | |
| 'confidence': '0', | |
| 'error': 'Abnormal LLM response detected', | |
| 'raw_response': response_text, | |
| 'latency': latency | |
| } | |
| # Extract condition from response | |
| extracted_condition = self._extract_condition(response_text) | |
| # Log the extracted condition | |
| self.logger.info(f"Extracted Condition: {extracted_condition}") | |
| return { | |
| 'extracted_condition': extracted_condition, | |
| 'confidence': '0.8', | |
| 'raw_response': response_text, | |
| 'latency': latency # Add latency to the return dictionary | |
| } | |
| except Exception as e: | |
| # Calculate latency even for failed requests | |
| end_time = time.time() | |
| latency = end_time - start_time | |
| self.logger.error(f"Medical LLM query error: {str(e)}") | |
| self.logger.error(f"Error Type: {type(e).__name__}") | |
| self.logger.error(f"Detailed Error: {repr(e)}") | |
| self.logger.error(f"Query Latency (on error): {latency:.4f} seconds") | |
| # Additional context logging | |
| self.logger.error(f"Query that caused error: {query}") | |
| return { | |
| 'extracted_condition': '', | |
| 'confidence': '0', | |
| 'error': str(e), | |
| 'latency': latency # Include latency even for error cases | |
| } | |
| def _extract_condition(self, response: str) -> str: | |
| """ | |
| Extract medical condition from model response. | |
| Args: | |
| response: Full model-generated text | |
| Returns: | |
| Extracted medical condition or empty string if non-medical | |
| """ | |
| # Check if this is a rejection response first | |
| if self._is_rejection_response(response): | |
| return "" | |
| from medical_conditions import CONDITION_KEYWORD_MAPPING | |
| # Search in known medical conditions | |
| for condition in CONDITION_KEYWORD_MAPPING.keys(): | |
| if condition.lower() in response.lower(): | |
| return condition | |
| return response.split('\n')[0].strip() or "" | |
| def _is_abnormal_response(self, response: str) -> bool: | |
| """ | |
| Detect abnormal LLM responses (e.g., repetitive characters, short/long responses) | |
| Args: | |
| response: LLM response text | |
| Returns: | |
| bool: True if response is abnormal, False otherwise | |
| """ | |
| if not response or not response.strip(): | |
| return True | |
| response_stripped = response.strip() | |
| # Detect repetitive characters (e.g., !!!!!!!) | |
| if len(response_stripped) > 20: | |
| unique_chars = len(set(response_stripped)) | |
| if unique_chars <= 3: # Only a few characters | |
| self.logger.warning(f"Detected repetitive character pattern: {response_stripped[:30]}...") | |
| return True | |
| # Detect special character patterns | |
| abnormal_patterns = ['!!!!', '????', '****', '####', '----'] | |
| for pattern in abnormal_patterns: | |
| if pattern in response_stripped: | |
| self.logger.warning(f"Detected abnormal pattern '{pattern}' in response") | |
| return True | |
| # Detect short response (less than 2 characters) | |
| if len(response_stripped) < 2: | |
| return True | |
| # Detect long response - allow some flexibility for detailed medical advice | |
| # 750 words ≈ 1000-1200 chars, allow some flexibility to 2500 chars | |
| if len(response_stripped) > 2500: # Changed from 1000 to 2500 | |
| self.logger.warning(f"Response extremely long: {len(response_stripped)} chars") | |
| return True | |
| return False | |
| def _is_rejection_response(self, response: str) -> bool: | |
| """ | |
| Dual-layer detection: prompt compliance + natural language patterns | |
| Args: | |
| response: LLM response text | |
| Returns: | |
| True if response indicates non-medical query rejection | |
| """ | |
| response_upper = response.upper() | |
| response_lower = response.lower() | |
| # Layer 1: Check for standardized format (if LLM follows prompt) | |
| if "NON_MEDICAL_QUERY" in response_upper: | |
| return True | |
| # Layer 2: Check natural language rejection patterns (fallback) | |
| rejection_patterns = [ | |
| "i do not address", | |
| "do not address", | |
| "outside my biomedical scope", | |
| "outside my medical scope", | |
| "unrelated to medical conditions", | |
| "not about a medical condition", | |
| "not a medical condition", | |
| "this query is outside", | |
| "culinary practice", # cooking-related | |
| "technology trends", # programming-related | |
| "meteorology", # weather-related | |
| "non-medical context" | |
| ] | |
| return any(pattern in response_lower for pattern in rejection_patterns) | |
| def main(): | |
| """ | |
| Test Medical LLM client functionality | |
| """ | |
| import time | |
| from datetime import datetime | |
| # Record total execution start time | |
| total_start_time = time.time() | |
| execution_start_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| try: | |
| print(f"Execution Started at: {execution_start_timestamp}") | |
| # Test client initialization | |
| client = llm_Med42_70BClient() | |
| test_queries = [ | |
| "patient experiencing chest pain", | |
| "sudden weakness on one side", | |
| "severe headache with neurological symptoms" | |
| ] | |
| # Store individual query results | |
| query_results = [] | |
| for query in test_queries: | |
| print(f"\nTesting query: {query}") | |
| result = client.analyze_medical_query(query) | |
| # Store query result | |
| query_result = { | |
| 'query': query, | |
| 'extracted_condition': result.get('extracted_condition', 'N/A'), | |
| 'confidence': result.get('confidence', 'N/A'), | |
| 'latency': result.get('latency', 'N/A') | |
| } | |
| query_results.append(query_result) | |
| # Print individual query results | |
| print("Extracted Condition:", query_result['extracted_condition']) | |
| print("Confidence:", query_result['confidence']) | |
| print(f"Latency: {query_result['latency']:.4f} seconds") | |
| if 'error' in result: | |
| print("Error:", result['error']) | |
| print("---") | |
| # Calculate total execution time | |
| total_end_time = time.time() | |
| total_execution_time = total_end_time - total_start_time | |
| execution_end_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| # Print summary | |
| print("\n--- Execution Summary ---") | |
| print(f"Execution Started at: {execution_start_timestamp}") | |
| print(f"Execution Ended at: {execution_end_timestamp}") | |
| print(f"Total Execution Time: {total_execution_time:.4f} seconds") | |
| # Optional: Return results for potential further processing | |
| return { | |
| 'start_time': execution_start_timestamp, | |
| 'end_time': execution_end_timestamp, | |
| 'total_execution_time': total_execution_time, | |
| 'query_results': query_results | |
| } | |
| except Exception as e: | |
| print(f"Client initialization error: {str(e)}") | |
| print("Possible issues:") | |
| print("1. Invalid or missing Hugging Face token") | |
| print("2. Network connectivity problems") | |
| print("3. Model access restrictions") | |
| print("\nPlease check your .env file and Hugging Face token.") | |
| # Calculate total execution time even in case of error | |
| total_end_time = time.time() | |
| total_execution_time = total_end_time - total_start_time | |
| return { | |
| 'error': str(e), | |
| 'total_execution_time': total_execution_time | |
| } | |
| if __name__ == "__main__": | |
| main() |