DataEngEval

Running

App Files Files Community

uparekh01151 commited on Sep 20

Commit

c1c187b

1 Parent(s): 4f99f28

Replace manual HTTP requests with InferenceClient for better reliability and error handling

Browse files

Files changed (1) hide show

src/models_registry.py +24 -34

src/models_registry.py CHANGED Viewed

@@ -5,10 +5,10 @@ Optimized for remote inference without local model loading.
 import yaml
 import os
-import requests
 from typing import List, Dict, Any, Optional
 from dataclasses import dataclass
 import sys
 # Add src to path for imports
 sys.path.append('src')
@@ -70,48 +70,38 @@ class ModelsRegistry:
 class HuggingFaceInference:
-    """Interface for Hugging Face Inference API."""
     def __init__(self, api_token: Optional[str] = None):
         self.api_token = api_token or os.getenv("HF_TOKEN")
-        self.base_url = "https://api-inference.huggingface.co/models"
     def generate(self, model_id: str, prompt: str, params: Dict[str, Any]) -> str:
         """Generate text using Hugging Face Inference API."""
-        headers = {}
-        if self.api_token:
-            headers["Authorization"] = f"Bearer {self.api_token}"
-        payload = {
-            "inputs": prompt,
-            "parameters": params
-        }
         try:
-            response = requests.post(
-                f"{self.base_url}/{model_id}",
-                headers=headers,
-                json=payload,
-                timeout=60
             )
-            if response.status_code != 200:
-                raise Exception(f"Hugging Face API error: {response.status_code} - {response.text}")
-            result = response.json()
-            # Handle different response formats
-            if isinstance(result, list) and len(result) > 0:
-                return result[0].get('generated_text', '')
-            elif isinstance(result, dict):
-                return result.get('generated_text', '')
-            else:
-                return str(result)
-        except requests.exceptions.Timeout:
-            raise Exception("Request timeout - model may be loading. Please try again in a moment.")
-        except requests.exceptions.RequestException as e:
-            raise Exception(f"Network error: {str(e)}")
 class ModelInterface:

 import yaml
 import os
 from typing import List, Dict, Any, Optional
 from dataclasses import dataclass
 import sys
+from huggingface_hub import InferenceClient
 # Add src to path for imports
 sys.path.append('src')
 class HuggingFaceInference:
+    """Interface for Hugging Face Inference API using InferenceClient."""
     def __init__(self, api_token: Optional[str] = None):
         self.api_token = api_token or os.getenv("HF_TOKEN")
+        # InferenceClient handles authentication automatically
+        self.client = InferenceClient(token=self.api_token)
     def generate(self, model_id: str, prompt: str, params: Dict[str, Any]) -> str:
         """Generate text using Hugging Face Inference API."""
         try:
+            # Much simpler API call with InferenceClient!
+            result = self.client.text_generation(
+                prompt=prompt,
+                model=model_id,
+                max_new_tokens=params.get('max_new_tokens', 128),
+                temperature=params.get('temperature', 0.1),
+                top_p=params.get('top_p', 0.9),
+                return_full_text=False  # Only return the generated part
             )
+            return result
+        except Exception as e:
+            # InferenceClient provides better error messages
+            if "404" in str(e):
+                raise Exception(f"Model not found: {model_id}")
+            elif "401" in str(e):
+                raise Exception(f"Authentication failed - check HF_TOKEN")
+            elif "503" in str(e):
+                raise Exception(f"Model {model_id} is loading, please try again in a moment")
+            else:
+                raise Exception(f"Hugging Face API error: {str(e)}")
 class ModelInterface: