finhdev commited on
Commit
c5d457b
·
verified ·
1 Parent(s): 9106e2d

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +40 -44
handler.py CHANGED
@@ -1,23 +1,27 @@
1
-
2
  import contextlib, io, base64, torch
3
  from PIL import Image
4
  import open_clip
5
  from reparam import reparameterize_model
6
 
7
-
8
  class EndpointHandler:
9
  def __init__(self, path: str = ""):
10
- # You can also pass pretrained='datacompdr' to let OpenCLIP download
11
- weights = f"{path}/mobileclip_b.pt"
12
- self.model, _, self.preprocess = open_clip.create_model_and_transforms(
13
- "MobileCLIP-B", pretrained=weights
 
 
14
  )
15
- self.model.eval()
16
- self.model = reparameterize_model(self.model) # *** fuse branches ***
17
 
18
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
19
- self.model.to(self.device)
20
  self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
 
 
 
 
 
 
21
 
22
  def __call__(self, data):
23
  payload = data.get("inputs", data)
@@ -28,81 +32,73 @@ class EndpointHandler:
28
 
29
  # ---------------- decode inputs ----------------
30
  image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
31
- img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
 
 
 
 
 
32
  text_tokens = self.tokenizer(labels).to(self.device)
33
 
34
  # ---------------- forward pass -----------------
35
- autocast_ctx = (
36
- torch.cuda.amp.autocast if self.device.startswith("cuda") else contextlib.nullcontext
37
- )
38
- with torch.no_grad(), autocast_ctx():
39
  img_feat = self.model.encode_image(img_tensor)
40
  txt_feat = self.model.encode_text(text_tokens)
41
  img_feat /= img_feat.norm(dim=-1, keepdim=True)
42
  txt_feat /= txt_feat.norm(dim=-1, keepdim=True)
43
- probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].tolist()
44
 
45
  return [
46
  {"label": l, "score": float(p)}
47
  for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)
48
  ]
49
-
50
-
51
- # # handler.py (repo root)
52
- # import io, base64, torch
53
  # from PIL import Image
54
  # import open_clip
 
55
 
56
- # class EndpointHandler:
57
- # """
58
- # Zero‑shot classifier for MobileCLIP‑B (OpenCLIP).
59
-
60
- # Expected client JSON *to the endpoint*:
61
- # {
62
- # "inputs": {
63
- # "image": "<base64 PNG/JPEG>",
64
- # "candidate_labels": ["cat", "dog", ...]
65
- # }
66
- # }
67
- # """
68
 
 
69
  # def __init__(self, path: str = ""):
 
70
  # weights = f"{path}/mobileclip_b.pt"
71
  # self.model, _, self.preprocess = open_clip.create_model_and_transforms(
72
  # "MobileCLIP-B", pretrained=weights
73
  # )
74
  # self.model.eval()
 
75
 
76
- # self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
77
  # self.device = "cuda" if torch.cuda.is_available() else "cpu"
78
  # self.model.to(self.device)
 
79
 
80
  # def __call__(self, data):
81
- # # ── unwrap Hugging Face's `inputs` envelope ───────────
82
  # payload = data.get("inputs", data)
83
-
84
  # img_b64 = payload["image"]
85
  # labels = payload.get("candidate_labels", [])
86
  # if not labels:
87
  # return {"error": "candidate_labels list is empty"}
88
 
89
- # # Decode & preprocess image
90
  # image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
91
- # img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
92
-
93
- # # Tokenise labels
94
  # text_tokens = self.tokenizer(labels).to(self.device)
95
 
96
- # # Forward pass
97
- # with torch.no_grad(), torch.cuda.amp.autocast():
 
 
 
98
  # img_feat = self.model.encode_image(img_tensor)
99
  # txt_feat = self.model.encode_text(text_tokens)
100
- # img_feat = img_feat / img_feat.norm(dim=-1, keepdim=True)
101
- # txt_feat = txt_feat / txt_feat.norm(dim=-1, keepdim=True)
102
  # probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].tolist()
103
 
104
- # # Sorted output
105
  # return [
106
  # {"label": l, "score": float(p)}
107
  # for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)
108
  # ]
 
 
 
 
1
  import contextlib, io, base64, torch
2
  from PIL import Image
3
  import open_clip
4
  from reparam import reparameterize_model
5
 
 
6
  class EndpointHandler:
7
  def __init__(self, path: str = ""):
8
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
9
+
10
+ # Fix 1: Load weights directly from the web, just like local script
11
+ # This guarantees the weights are identical.
12
+ model, _, self.preprocess = open_clip.create_model_and_transforms(
13
+ "MobileCLIP-B", pretrained='datacompdr'
14
  )
15
+ model.eval()
16
+ self.model = reparameterize_model(model) # fuse branches
17
 
 
 
18
  self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
19
+ self.model.to(self.device)
20
+
21
+ # Fix 2: Explicitly set model to half-precision if on CUDA
22
+ # This matches the behavior of torch.set_default_dtype(torch.float16)
23
+ if self.device == "cuda":
24
+ self.model.to(torch.float16)
25
 
26
  def __call__(self, data):
27
  payload = data.get("inputs", data)
 
32
 
33
  # ---------------- decode inputs ----------------
34
  image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
35
+ img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
36
+
37
+ # The preprocessor might output float32, so ensure tensor matches model dtype
38
+ if self.device == "cuda":
39
+ img_tensor = img_tensor.to(torch.float16)
40
+
41
  text_tokens = self.tokenizer(labels).to(self.device)
42
 
43
  # ---------------- forward pass -----------------
44
+ # No need for autocast if everything is already float16
45
+ with torch.no_grad():
 
 
46
  img_feat = self.model.encode_image(img_tensor)
47
  txt_feat = self.model.encode_text(text_tokens)
48
  img_feat /= img_feat.norm(dim=-1, keepdim=True)
49
  txt_feat /= txt_feat.norm(dim=-1, keepdim=True)
50
+ probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].cpu().tolist()
51
 
52
  return [
53
  {"label": l, "score": float(p)}
54
  for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)
55
  ]
56
+ # import contextlib, io, base64, torch
 
 
 
57
  # from PIL import Image
58
  # import open_clip
59
+ # from reparam import reparameterize_model
60
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ # class EndpointHandler:
63
  # def __init__(self, path: str = ""):
64
+ # # You can also pass pretrained='datacompdr' to let OpenCLIP download
65
  # weights = f"{path}/mobileclip_b.pt"
66
  # self.model, _, self.preprocess = open_clip.create_model_and_transforms(
67
  # "MobileCLIP-B", pretrained=weights
68
  # )
69
  # self.model.eval()
70
+ # self.model = reparameterize_model(self.model) # *** fuse branches ***
71
 
 
72
  # self.device = "cuda" if torch.cuda.is_available() else "cpu"
73
  # self.model.to(self.device)
74
+ # self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
75
 
76
  # def __call__(self, data):
 
77
  # payload = data.get("inputs", data)
 
78
  # img_b64 = payload["image"]
79
  # labels = payload.get("candidate_labels", [])
80
  # if not labels:
81
  # return {"error": "candidate_labels list is empty"}
82
 
83
+ # # ---------------- decode inputs ----------------
84
  # image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
85
+ # img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
 
 
86
  # text_tokens = self.tokenizer(labels).to(self.device)
87
 
88
+ # # ---------------- forward pass -----------------
89
+ # autocast_ctx = (
90
+ # torch.cuda.amp.autocast if self.device.startswith("cuda") else contextlib.nullcontext
91
+ # )
92
+ # with torch.no_grad(), autocast_ctx():
93
  # img_feat = self.model.encode_image(img_tensor)
94
  # txt_feat = self.model.encode_text(text_tokens)
95
+ # img_feat /= img_feat.norm(dim=-1, keepdim=True)
96
+ # txt_feat /= txt_feat.norm(dim=-1, keepdim=True)
97
  # probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].tolist()
98
 
 
99
  # return [
100
  # {"label": l, "score": float(p)}
101
  # for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)
102
  # ]
103
+
104
+