dejanseo
/

universal-query-classifier-base

Safetensors

deberta-v2

Model card Files Files and versions

xet

Community

dejanseo commited on Jun 27

Commit

c6e6058

verified ·

1 Parent(s): 65018a5

Update handler.py

Browse files

Files changed (1) hide show

handler.py +90 -2

handler.py CHANGED Viewed

@@ -22,10 +22,22 @@ class EndpointHandler:
         self.classifier.bias.data = torch.tensor(head["scorer_bias"]).to(self.device)
         self.model.eval()
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         payload = data.get("inputs", data)
         query = payload["query"]
         candidates = payload["candidates"]
         results = []
@@ -38,7 +50,7 @@ class EndpointHandler:
                     return_tensors="pt",
                     padding="max_length",
                     truncation=True,
-                    max_length=64
                 ).to(self.device)
                 out = self.model(**tokens)
@@ -51,3 +63,79 @@ class EndpointHandler:
                 })
         return sorted(results, key=lambda x: x["score"], reverse=True)

         self.classifier.bias.data = torch.tensor(head["scorer_bias"]).to(self.device)
         self.model.eval()
+        # Batch processing configuration
+        self.max_batch_size = 128  # Adjust based on GPU memory
+        self.max_length = 64
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         payload = data.get("inputs", data)
+        # Check if this is batch processing (multiple queries) or single query
+        if "queries" in payload:
+            return self._process_batch(payload)
+        else:
+            return self._process_single(payload)
+    def _process_single(self, payload: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """Original single query processing for backward compatibility"""
         query = payload["query"]
         candidates = payload["candidates"]
         results = []
                     return_tensors="pt",
                     padding="max_length",
                     truncation=True,
+                    max_length=self.max_length
                 ).to(self.device)
                 out = self.model(**tokens)
                 })
         return sorted(results, key=lambda x: x["score"], reverse=True)
+    def _process_batch(self, payload: Dict[str, Any]) -> List[List[Dict[str, Any]]]:
+        """True batch processing for multiple queries"""
+        queries = payload["queries"]
+        candidates = payload["candidates"]
+        # Create all query-candidate combinations
+        all_texts = []
+        query_indices = []
+        candidate_indices = []
+        for q_idx, query in enumerate(queries):
+            for c_idx, candidate in enumerate(candidates):
+                text = f"[QUERY] {query} [LABEL_NAME] {candidate['label']} [LABEL_DESCRIPTION] {candidate['description']}"
+                all_texts.append(text)
+                query_indices.append(q_idx)
+                candidate_indices.append(c_idx)
+        # Process in batches to avoid memory issues
+        all_scores = []
+        total_combinations = len(all_texts)
+        with torch.no_grad():
+            for i in range(0, total_combinations, self.max_batch_size):
+                batch_texts = all_texts[i:i + self.max_batch_size]
+                # Tokenize batch
+                tokens = self.tokenizer(
+                    batch_texts,
+                    return_tensors="pt",
+                    padding="max_length",
+                    truncation=True,
+                    max_length=self.max_length
+                ).to(self.device)
+                # Single forward pass for entire batch
+                out = self.model(**tokens)
+                cls = out.last_hidden_state[:, 0, :]
+                scores = torch.sigmoid(self.classifier(cls)).squeeze()
+                # Handle single item case
+                if scores.dim() == 0:
+                    scores = scores.unsqueeze(0)
+                all_scores.extend(scores.cpu().tolist())
+        # Reshape results back to query structure
+        results = []
+        for q_idx in range(len(queries)):
+            query_results = []
+            for c_idx, candidate in enumerate(candidates):
+                # Find the score for this query-candidate combination
+                combination_idx = q_idx * len(candidates) + c_idx
+                score = all_scores[combination_idx]
+                query_results.append({
+                    "label": candidate["label"],
+                    "description": candidate["description"],
+                    "score": round(score, 4)
+                })
+            # Sort by score for this query
+            query_results.sort(key=lambda x: x["score"], reverse=True)
+            results.append(query_results)
+        return results
+    def get_batch_stats(self) -> Dict[str, Any]:
+        """Return batch processing statistics"""
+        return {
+            "max_batch_size": self.max_batch_size,
+            "max_length": self.max_length,
+            "device": str(self.device),
+            "model_name": self.model.config.name_or_path if hasattr(self.model.config, 'name_or_path') else "unknown"
+        }