finhdev
/

testmobileclip

mobileclip

Model card Files Files and versions

xet

Community

finhdev commited on Jul 29, 2025

Commit

88b442b

verified ·

1 Parent(s): 440d3d5

Update handler.py

Browse files

Files changed (1) hide show

handler.py +21 -20

handler.py CHANGED Viewed

@@ -5,8 +5,10 @@ from PIL import Image
 class EndpointHandler:
     """
-    MobileCLIP‑B zero‑shot (OpenCLIP, pretrained = 'datacompdr')
-    Expects JSON:
       {
         "inputs": {
           "image": "<base64 PNG/JPEG>",
@@ -15,52 +17,51 @@ class EndpointHandler:
       }
     """
-    # ---------- initialisation (once per container) ----------
     def __init__(self, path=""):
-        # • Use the same checkpoint as your local workflow
-        # • No need for the local mobileclip_b.pt file
         self.model, _, self.preprocess = open_clip.create_model_and_transforms(
             "mobileclip_b", pretrained="datacompdr"
         )
         self.model.eval()
         self.tokenizer = open_clip.get_tokenizer("mobileclip_b")
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.model.to(self.device)
-        # Cache: {prompt -> 1×512 tensor}
-        self.label_cache: dict[str, torch.Tensor] = {}
-    # -------------------- inference --------------------------
     def __call__(self, data):
         payload = data.get("inputs", data)
         img_b64 = payload["image"]
         labels  = payload.get("candidate_labels", [])
         if not labels:
             return {"error": "candidate_labels list is empty"}
-        # image → tensor
-        image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
-        img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
-        # text → cached embeddings
-        missing = [l for l in labels if l not in self.label_cache]
         if missing:
             tok = self.tokenizer(missing).to(self.device)
             with torch.no_grad():
                 emb = self.model.encode_text(tok)
                 emb = emb / emb.norm(dim=-1, keepdim=True)
             for l, e in zip(missing, emb):
-                self.label_cache[l] = e
-        txt_feat = torch.stack([self.label_cache[l] for l in labels])
-        # forward
         with torch.no_grad(), torch.cuda.amp.autocast():
-            img_feat = self.model.encode_image(img_tensor)
-            img_feat = img_feat / img_feat.norm(dim=-1, keepdim=True)
-            probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].tolist()
-        # sorted result
         return [
             {"label": l, "score": float(p)}
             for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)

 class EndpointHandler:
     """
+    MobileCLIP‑B (pretrained='datacompdr') zero‑shot classifier with
+    per‑container text‑embedding cache.
+    Client JSON:
       {
         "inputs": {
           "image": "<base64 PNG/JPEG>",
       }
     """
     def __init__(self, path=""):
+        # --- model & transforms ---------------------------------
         self.model, _, self.preprocess = open_clip.create_model_and_transforms(
             "mobileclip_b", pretrained="datacompdr"
         )
         self.model.eval()
+        # --- tokenizer & device --------------------------------
         self.tokenizer = open_clip.get_tokenizer("mobileclip_b")
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.model.to(self.device)
+        # --- text‑embedding cache ------------------------------
+        self.cache: dict[str, torch.Tensor] = {}
     def __call__(self, data):
+        # 1. unwrap HF 'inputs'
         payload = data.get("inputs", data)
         img_b64 = payload["image"]
         labels  = payload.get("candidate_labels", [])
         if not labels:
             return {"error": "candidate_labels list is empty"}
+        # 2. image -> tensor
+        img = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
+        img_t = self.preprocess(img).unsqueeze(0).to(self.device)
+        # 3. text -> cached embeddings
+        missing = [l for l in labels if l not in self.cache]
         if missing:
             tok = self.tokenizer(missing).to(self.device)
             with torch.no_grad():
                 emb = self.model.encode_text(tok)
                 emb = emb / emb.norm(dim=-1, keepdim=True)
             for l, e in zip(missing, emb):
+                self.cache[l] = e
+        txt_t = torch.stack([self.cache[l] for l in labels])
+        # 4. forward
         with torch.no_grad(), torch.cuda.amp.autocast():
+            img_f = self.model.encode_image(img_t)
+            img_f = img_f / img_f.norm(dim=-1, keepdim=True)
+            probs = (100 * img_f @ txt_t.T).softmax(dim=-1)[0].tolist()
+        # 5. sorted response
         return [
             {"label": l, "score": float(p)}
             for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)