Spaces:

ybchen928
/

oncall-guide-ai

Sleeping

App Files Files Community

YanBoChen commited on Jul 30

Commit

4c919d2

1 Parent(s): acc25ea

feat(llm_clients): enhance MeditronClient to support local model loading and improve error handling

Browse files

Files changed (1) hide show

src/llm_clients.py +117 -39

src/llm_clients.py CHANGED Viewed

@@ -10,6 +10,8 @@ Date: 2025-07-29
 import logging
 import os
 from typing import Dict, Optional
 from huggingface_hub import InferenceClient
 from dotenv import load_dotenv
@@ -19,34 +21,72 @@ load_dotenv()
 class MeditronClient:
     def __init__(
         self,
-        model: str = "TheBloke/meditron-7B-GPTQ",
         timeout: float = 30.0
     ):
         """
-        Initialize Meditron API client for medical query processing.
         Args:
-            model: Hugging Face model name
-            timeout: API call timeout duration (not used in InferenceClient)
         Warning: This model should not be used for professional medical advice.
         """
-        # Get HF token from environment variable
-        hf_token = os.getenv('HF_TOKEN')
-        if not hf_token:
-            raise ValueError(
-                "HF_TOKEN not found in environment variables. "
-                "Please set HF_TOKEN in your .env file or environment."
-            )
-        self.client = InferenceClient(model=model, token=hf_token)
         self.logger = logging.getLogger(__name__)
         self.timeout = timeout
-        self.logger.warning(
-            "Meditron Model: Research tool only. "
-            "Not for professional medical diagnosis."
-        )
-        self.logger.info("Meditron client initialized with HF token")
     def analyze_medical_query(
         self,
@@ -60,7 +100,7 @@ class MeditronClient:
         Args:
             query: Medical query text
             max_tokens: Maximum tokens to generate
-            timeout: Specific API call timeout (not used in InferenceClient)
         Returns:
             Extracted medical condition information
@@ -78,34 +118,67 @@ DO NOT provide medical advice.
 <|im_start|>assistant
 """
-            self.logger.info(f"Calling Meditron API with query: {query}")
-            # Remove timeout parameter as InferenceClient doesn't support it
-            response = self.client.text_generation(
-                prompt,
-                max_new_tokens=max_tokens,
-                temperature=0.7,
-                top_k=50
-            )
-            self.logger.info(f"Received response: {response}")
             # Extract condition from response
-            extracted_condition = self._extract_condition(response)
             return {
                 'extracted_condition': extracted_condition,
                 'confidence': 0.8,
-                'raw_response': response
             }
         except Exception as e:
-            self.logger.error(f"Meditron API query error: {str(e)}")
             self.logger.error(f"Error type: {type(e).__name__}")
             return {
                 'extracted_condition': '',
                 'confidence': 0,
-                'error': str(e)
             }
     def _extract_condition(self, response: str) -> str:
@@ -135,7 +208,12 @@ def main():
     Test Meditron client functionality
     """
     try:
-        client = MeditronClient()
         test_queries = [
             "patient experiencing chest pain",
             "sudden weakness on one side",
@@ -154,12 +232,12 @@ def main():
     except Exception as e:
         print(f"Client initialization error: {str(e)}")
         print("This might be due to:")
-        print("1. Missing Hugging Face API token")
-        print("2. Network connectivity issues")
-        print("3. Model access permissions")
         print("\nTo fix:")
-        print("1. Set HF_TOKEN environment variable")
-        print("2. Or login with: huggingface-cli login")
 if __name__ == "__main__":
     main()

 import logging
 import os
 from typing import Dict, Optional
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
 from huggingface_hub import InferenceClient
 from dotenv import load_dotenv
 class MeditronClient:
     def __init__(
         self,
+        model_name: str = "TheBloke/meditron-7B-GPTQ",
+        local_model_path: Optional[str] = None,
+        use_local: bool = False,
         timeout: float = 30.0
     ):
         """
+        Initialize Meditron client for medical query processing.
         Args:
+            model_name: Hugging Face model name
+            local_model_path: Path to local model files
+            use_local: Flag to use local model
+            timeout: API call timeout duration
         Warning: This model should not be used for professional medical advice.
         """
         self.logger = logging.getLogger(__name__)
         self.timeout = timeout
+        self.use_local = use_local
+        if use_local:
+            if not local_model_path:
+                raise ValueError("local_model_path must be provided when use_local is True")
+            try:
+                # Load local model using Hugging Face transformers
+                self.tokenizer = AutoTokenizer.from_pretrained(
+                    model_name,
+                    local_files_only=True,
+                    cache_dir=local_model_path
+                )
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    model_name,
+                    local_files_only=True,
+                    cache_dir=local_model_path,
+                    device_map="auto",
+                    torch_dtype=torch.float16
+                )
+                self.logger.info(f"Local Meditron model loaded from: {local_model_path}")
+                self.logger.warning(
+                    "Meditron Model: Research tool only. "
+                    "Not for professional medical diagnosis."
+                )
+            except Exception as e:
+                self.logger.error(f"Failed to load local model: {str(e)}")
+                raise ValueError(f"Failed to initialize local Meditron client: {str(e)}")
+        else:
+            # Existing InferenceClient logic
+            hf_token = os.getenv('HF_TOKEN')
+            if not hf_token:
+                raise ValueError(
+                    "HF_TOKEN not found in environment variables. "
+                    "Please set HF_TOKEN in your .env file or environment."
+                )
+            try:
+                self.client = InferenceClient(model=model_name, token=hf_token)
+                self.logger.info(f"Meditron client initialized with model: {model_name}")
+                self.logger.warning(
+                    "Meditron Model: Research tool only. "
+                    "Not for professional medical diagnosis."
+                )
+            except Exception as e:
+                self.logger.error(f"Failed to initialize InferenceClient: {str(e)}")
+                raise ValueError(f"Failed to initialize Meditron client: {str(e)}")
     def analyze_medical_query(
         self,
         Args:
             query: Medical query text
             max_tokens: Maximum tokens to generate
+            timeout: Specific API call timeout
         Returns:
             Extracted medical condition information
 <|im_start|>assistant
 """
+            self.logger.info(f"Calling Meditron with query: {query}")
+            if self.use_local:
+                # Local model inference
+                input_ids = self.tokenizer(prompt, return_tensors='pt').input_ids.to(self.model.device)
+                response = self.model.generate(
+                    input_ids,
+                    max_new_tokens=max_tokens,
+                    temperature=0.7,
+                    do_sample=True,
+                    top_k=50
+                )
+                response_text = self.tokenizer.decode(response[0], skip_special_tokens=True)
+                self.logger.info(f"Local model response: {response_text}")
+            else:
+                # InferenceClient inference
+                self.logger.info(f"Using model: {self.client.model}")
+                # Test API connection first
+                try:
+                    test_response = self.client.text_generation(
+                        "Hello",
+                        max_new_tokens=5,
+                        temperature=0.7,
+                        top_k=50
+                    )
+                    self.logger.info("API connection test successful")
+                except Exception as test_error:
+                    self.logger.error(f"API connection test failed: {str(test_error)}")
+                    return {
+                        'extracted_condition': '',
+                        'confidence': 0,
+                        'error': f"API connection failed: {str(test_error)}"
+                    }
+                response_text = self.client.text_generation(
+                    prompt,
+                    max_new_tokens=max_tokens,
+                    temperature=0.7,
+                    top_k=50
+                )
             # Extract condition from response
+            extracted_condition = self._extract_condition(response_text)
             return {
                 'extracted_condition': extracted_condition,
                 'confidence': 0.8,
+                'raw_response': response_text
             }
         except Exception as e:
+            self.logger.error(f"Meditron query error: {str(e)}")
             self.logger.error(f"Error type: {type(e).__name__}")
+            self.logger.error(f"Error details: {repr(e)}")
             return {
                 'extracted_condition': '',
                 'confidence': 0,
+                'error': f"{type(e).__name__}: {str(e)}"
             }
     def _extract_condition(self, response: str) -> str:
     Test Meditron client functionality
     """
     try:
+        # Test local model loading
+        client = MeditronClient(
+            local_model_path="/Users/yanbochen/Documents/Life in Canada/CS study related/*Student Course, Guide/CS7180 GenAI/FinalProject_git_copy/models/cache/meditron-7B-GPTQ",
+            use_local=True
+        )
         test_queries = [
             "patient experiencing chest pain",
             "sudden weakness on one side",
     except Exception as e:
         print(f"Client initialization error: {str(e)}")
         print("This might be due to:")
+        print("1. Incorrect local model path")
+        print("2. Missing dependencies")
+        print("3. Hardware limitations")
         print("\nTo fix:")
+        print("1. Verify local model path")
+        print("2. Install required dependencies")
 if __name__ == "__main__":
     main()