KrishnaIndukuri
/

IRMSEmbeddingsV4

+import os
+import json
+import torch
+import base64
+from io import BytesIO
+from typing import List, Dict, Any, Union
+from PIL import Image
+from transformers import AutoProcessor
+from custom_st import Transformer
+class ModelHandler:
+    """
+    Custom handler for the embedding model using the Transformer class from custom_st.py
+    """
+    def __init__(self):
+        self.initialized = False
+        self.model = None
+        self.processor = None
+        self.device = None
+        self.default_task = "retrieval"  # Default task, can be overridden in initialize
+        self.max_seq_length = 8192  # Default max sequence length
+    def initialize(self, context):
+        """
+        Initialize model and processor
+        """
+        self.initialized = True
+        # Get model directory
+        properties = context.system_properties
+        model_dir = properties.get("model_dir")
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # Load config if exists
+        config_path = os.path.join(model_dir, "config.json")
+        if os.path.exists(config_path):
+            with open(config_path, 'r') as f:
+                config = json.load(f)
+                self.default_task = config.get("default_task", self.default_task)
+                self.max_seq_length = config.get("max_seq_length", self.max_seq_length)
+        # Initialize model
+        self.model = Transformer(
+            model_name_or_path=model_dir,
+            max_seq_length=self.max_seq_length,
+            model_args={"default_task": self.default_task}
+        )
+        self.model.model.to(self.device)
+        self.model.model.eval()
+        # Get processor from the model
+        self.processor = self.model.processor
+    def preprocess(self, data):
+        """
+        Process input data for the model
+        """
+        inputs = []
+        # Extract request body
+        for row in data:
+            body = row.get("body", {})
+            if isinstance(body, (bytes, bytearray)):
+                body = json.loads(body.decode('utf-8'))
+            elif isinstance(body, str):
+                body = json.loads(body)
+            # Handle different input formats
+            if "inputs" in body:
+                raw_inputs = body["inputs"]
+                if isinstance(raw_inputs, str):
+                    inputs.append(raw_inputs)
+                elif isinstance(raw_inputs, list):
+                    inputs.extend(raw_inputs)
+            elif "text" in body:
+                inputs.append(body["text"])
+            elif "image" in body:
+                # Handle base64 encoded images
+                image_data = body["image"]
+                if isinstance(image_data, str) and image_data.startswith("data:image"):
+                    # Extract base64 data from data URL
+                    image_data = image_data.split(",")[1]
+                    image = Image.open(BytesIO(base64.b64decode(image_data))).convert("RGB")
+                    inputs.append(image)
+                else:
+                    inputs.append(image_data)  # URL or file path
+            elif "inputs" not in body and not body:
+                # Empty request, return empty response
+                return []
+        # Use the model's tokenize method to process inputs
+        if inputs:
+            features = self.model.tokenize(inputs)
+            return features
+        return []
+    def inference(self, features):
+        """
+        Run inference with the processed features
+        """
+        if not features:
+            return {"embeddings": []}
+        # Move tensors to the device
+        for key, value in features.items():
+            if isinstance(value, torch.Tensor):
+                features[key] = value.to(self.device)
+        with torch.no_grad():
+            outputs = self.model.forward(features, task=self.default_task)
+        # Get the embeddings
+        embeddings = outputs.get("sentence_embedding", None)
+        if embeddings is not None:
+            # Convert to list for JSON serialization
+            return {"embeddings": embeddings.cpu().numpy().tolist()}
+        else:
+            return {"error": "No embeddings were generated"}
+    def postprocess(self, inference_output):
+        """
+        Process model output for the response
+        """
+        return [inference_output]
+    def handle(self, data, context):
+        """
+        Main handler function
+        """
+        if not self.initialized:
+            self.initialize(context)
+        if not data:
+            return {"embeddings": []}
+        try:
+            processed_data = self.preprocess(data)
+            if not processed_data:
+                return [{"embeddings": []}]
+            inference_result = self.inference(processed_data)
+            return self.postprocess(inference_result)
+        except Exception as e:
+            raise Exception(f"Error processing request: {str(e)}")
+# Define the handler for torchserve
+_service = ModelHandler()
+def handle(data, context):
+    """
+    Torchserve handler function
+    """
+    return _service.handle(data, context)