finhdev
/

testmobileclip

mobileclip

Model card Files Files and versions

xet

Community

finhdev commited on Jul 29, 2025

Commit

2fb4fd2

verified ·

1 Parent(s): 3bad150

Update handler.py

Browse files

Files changed (1) hide show

handler.py +18 -14

handler.py CHANGED Viewed

@@ -1,32 +1,36 @@
 # handler.py  (repo root)
 import io, base64, torch, open_clip
 from PIL import Image
-from mobileclip.modules.common.mobileone import reparameterize_model             # optional
 class EndpointHandler:
     """
-    MobileCLIP‑B ('datacompdr') · text‑embedding cache.
-    Expects: {
-      "inputs": {
-        "image": "<base64>",
-        "candidate_labels": ["a photo of a cat", ...]
       }
-    }
     """
     def __init__(self, path=""):
-        # -- Load MobileCLIP‑B checkpoint identical to local run -------------
         self.model, _, self.preprocess = open_clip.create_model_and_transforms(
             "mobileclip_b", pretrained="datacompdr"
         )
-        self.model = reparameterize_model(self.model).eval()   # matches local pipeline
         self.tokenizer = open_clip.get_tokenizer("mobileclip_b")
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.model.to(self.device)
-        self.cache: dict[str, torch.Tensor] = {}               # label → embedding
     def __call__(self, data):
         payload = data.get("inputs", data)
@@ -35,11 +39,11 @@ class EndpointHandler:
         if not labels:
             return {"error": "candidate_labels list is empty"}
-        # -------- image preprocessing --------------------------------------
         img = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
         img_t = self.preprocess(img).unsqueeze(0).to(self.device)
-        # -------- text embeddings with cache -------------------------------
         new = [l for l in labels if l not in self.cache]
         if new:
             tok = self.tokenizer(new).to(self.device)
@@ -50,7 +54,7 @@ class EndpointHandler:
                 self.cache[l] = e
         txt_t = torch.stack([self.cache[l] for l in labels])
-        # -------- forward & softmax ----------------------------------------
         with torch.no_grad(), torch.cuda.amp.autocast():
             img_f = self.model.encode_image(img_t)
             img_f = img_f / img_f.norm(dim=-1, keepdim=True)

 # handler.py  (repo root)
 import io, base64, torch, open_clip
 from PIL import Image
+# optional: from open_clip import fuse_conv_bn_sequential   # if you want re‑param
 class EndpointHandler:
     """
+    MobileCLIP‑B ('datacompdr') zero‑shot classifier with per‑process
+    text‑embedding cache.
+    Expected client JSON:
+      {
+        "inputs": {
+          "image": "<base64 PNG/JPEG>",
+          "candidate_labels": ["a photo of a cat", ...]
+        }
       }
     """
     def __init__(self, path=""):
+        # Load the exact weights your local run uses
         self.model, _, self.preprocess = open_clip.create_model_and_transforms(
             "mobileclip_b", pretrained="datacompdr"
         )
+        # Optional: fuse conv+bn for speed
+        # self.model = fuse_conv_bn_sequential(self.model).eval()
+        self.model.eval()
         self.tokenizer = open_clip.get_tokenizer("mobileclip_b")
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.model.to(self.device)
+        self.cache: dict[str, torch.Tensor] = {}   # prompt → embedding
     def __call__(self, data):
         payload = data.get("inputs", data)
         if not labels:
             return {"error": "candidate_labels list is empty"}
+        # Image → tensor
         img = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
         img_t = self.preprocess(img).unsqueeze(0).to(self.device)
+        # Text embeddings with cache
         new = [l for l in labels if l not in self.cache]
         if new:
             tok = self.tokenizer(new).to(self.device)
                 self.cache[l] = e
         txt_t = torch.stack([self.cache[l] for l in labels])
+        # Forward
         with torch.no_grad(), torch.cuda.amp.autocast():
             img_f = self.model.encode_image(img_t)
             img_f = img_f / img_f.norm(dim=-1, keepdim=True)