finhdev commited on
Commit
2f51b26
·
verified ·
1 Parent(s): 9bde8e9

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +64 -39
handler.py CHANGED
@@ -1,4 +1,4 @@
1
- import contextlib, io, base64, torch
2
  from PIL import Image
3
  import open_clip
4
  from reparam import reparameterize_model
@@ -7,71 +7,92 @@ class EndpointHandler:
7
  def __init__(self, path: str = ""):
8
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
- # Fix 1: Load weights directly from the web, just like local script
11
- # This guarantees the weights are identical.
12
  model, _, self.preprocess = open_clip.create_model_and_transforms(
13
  "MobileCLIP-B", pretrained='datacompdr'
14
  )
15
  model.eval()
16
- self.model = reparameterize_model(model) # fuse branches
17
-
18
- self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
19
  self.model.to(self.device)
20
 
21
- # Fix 2: Explicitly set model to half-precision if on CUDA
22
- # This matches the behavior of torch.set_default_dtype(torch.float16)
23
  if self.device == "cuda":
24
  self.model.to(torch.float16)
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def __call__(self, data):
 
27
  payload = data.get("inputs", data)
28
  img_b64 = payload["image"]
29
- labels = payload.get("candidate_labels", [])
30
- if not labels:
31
- return {"error": "candidate_labels list is empty"}
32
 
33
- # ---------------- decode inputs ----------------
34
  image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
35
  img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
36
 
37
- # The preprocessor might output float32, so ensure tensor matches model dtype
38
  if self.device == "cuda":
39
  img_tensor = img_tensor.to(torch.float16)
40
 
41
- text_tokens = self.tokenizer(labels).to(self.device)
42
-
43
- # ---------------- forward pass -----------------
44
- # No need for autocast if everything is already float16
45
  with torch.no_grad():
 
46
  img_feat = self.model.encode_image(img_tensor)
47
- txt_feat = self.model.encode_text(text_tokens)
48
  img_feat /= img_feat.norm(dim=-1, keepdim=True)
49
- txt_feat /= txt_feat.norm(dim=-1, keepdim=True)
50
- probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].cpu().tolist()
51
 
52
- return [
53
- {"label": l, "score": float(p)}
54
- for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)
55
- ]
 
 
 
 
 
 
 
 
 
56
  # import contextlib, io, base64, torch
57
  # from PIL import Image
58
  # import open_clip
59
  # from reparam import reparameterize_model
60
 
61
-
62
  # class EndpointHandler:
63
  # def __init__(self, path: str = ""):
64
- # # You can also pass pretrained='datacompdr' to let OpenCLIP download
65
- # weights = f"{path}/mobileclip_b.pt"
66
- # self.model, _, self.preprocess = open_clip.create_model_and_transforms(
67
- # "MobileCLIP-B", pretrained=weights
 
 
68
  # )
69
- # self.model.eval()
70
- # self.model = reparameterize_model(self.model) # *** fuse branches ***
71
 
72
- # self.device = "cuda" if torch.cuda.is_available() else "cpu"
73
- # self.model.to(self.device)
74
  # self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
 
 
 
 
 
 
75
 
76
  # def __call__(self, data):
77
  # payload = data.get("inputs", data)
@@ -82,23 +103,27 @@ class EndpointHandler:
82
 
83
  # # ---------------- decode inputs ----------------
84
  # image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
85
- # img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
 
 
 
 
 
86
  # text_tokens = self.tokenizer(labels).to(self.device)
87
 
88
  # # ---------------- forward pass -----------------
89
- # autocast_ctx = (
90
- # torch.cuda.amp.autocast if self.device.startswith("cuda") else contextlib.nullcontext
91
- # )
92
- # with torch.no_grad(), autocast_ctx():
93
  # img_feat = self.model.encode_image(img_tensor)
94
  # txt_feat = self.model.encode_text(text_tokens)
95
  # img_feat /= img_feat.norm(dim=-1, keepdim=True)
96
  # txt_feat /= txt_feat.norm(dim=-1, keepdim=True)
97
- # probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].tolist()
98
 
99
  # return [
100
  # {"label": l, "score": float(p)}
101
  # for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)
102
  # ]
103
 
 
104
 
 
1
+ import contextlib, io, base64, torch, json
2
  from PIL import Image
3
  import open_clip
4
  from reparam import reparameterize_model
 
7
  def __init__(self, path: str = ""):
8
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
+ # 1. Load the model (happens only once at startup)
 
11
  model, _, self.preprocess = open_clip.create_model_and_transforms(
12
  "MobileCLIP-B", pretrained='datacompdr'
13
  )
14
  model.eval()
15
+ self.model = reparameterize_model(model)
16
+ tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
 
17
  self.model.to(self.device)
18
 
 
 
19
  if self.device == "cuda":
20
  self.model.to(torch.float16)
21
 
22
+ # --- OPTIMIZATION: Pre-compute text features from your JSON ---
23
+
24
+ # 2. Load your rich class definitions from the file
25
+ with open(f"{path}/classes.json", "r", encoding="utf-8") as f:
26
+ class_definitions = json.load(f)
27
+
28
+ # 3. Prepare the data for encoding and for the final response
29
+ # - Use the 'prompt' field for creating the embeddings
30
+ # - Keep 'name' and 'id' to structure the response later
31
+ prompts = [item['prompt'] for item in class_definitions]
32
+ self.class_ids = [item['id'] for item in class_definitions]
33
+ self.class_names = [item['name'] for item in class_definitions]
34
+
35
+ # 4. Tokenize and encode all prompts at once
36
+ with torch.no_grad():
37
+ text_tokens = tokenizer(prompts).to(self.device)
38
+ self.text_features = self.model.encode_text(text_tokens)
39
+ self.text_features /= self.text_features.norm(dim=-1, keepdim=True)
40
+
41
  def __call__(self, data):
42
+ # The payload only needs the image now
43
  payload = data.get("inputs", data)
44
  img_b64 = payload["image"]
 
 
 
45
 
46
+ # ---------------- decode image ----------------
47
  image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
48
  img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
49
 
 
50
  if self.device == "cuda":
51
  img_tensor = img_tensor.to(torch.float16)
52
 
53
+ # ---------------- forward pass (very fast) -----------------
 
 
 
54
  with torch.no_grad():
55
+ # 1. Encode only the image
56
  img_feat = self.model.encode_image(img_tensor)
 
57
  img_feat /= img_feat.norm(dim=-1, keepdim=True)
 
 
58
 
59
+ # 2. Compute similarity against the pre-computed text features
60
+ probs = (100 * img_feat @ self.text_features.T).softmax(dim=-1)[0]
61
+
62
+ # 3. Combine the results with your stored class IDs and names
63
+ # and convert the tensor of probabilities to a list of floats
64
+ results = zip(self.class_ids, self.class_names, probs.cpu().tolist())
65
+
66
+ # 4. Create a sorted list of dictionaries for a clean JSON response
67
+ return sorted(
68
+ [{"id": i, "label": name, "score": float(p)} for i, name, p in results],
69
+ key=lambda x: x["score"],
70
+ reverse=True
71
+ )
72
  # import contextlib, io, base64, torch
73
  # from PIL import Image
74
  # import open_clip
75
  # from reparam import reparameterize_model
76
 
 
77
  # class EndpointHandler:
78
  # def __init__(self, path: str = ""):
79
+ # self.device = "cuda" if torch.cuda.is_available() else "cpu"
80
+
81
+ # # Fix 1: Load weights directly from the web, just like local script
82
+ # # This guarantees the weights are identical.
83
+ # model, _, self.preprocess = open_clip.create_model_and_transforms(
84
+ # "MobileCLIP-B", pretrained='datacompdr'
85
  # )
86
+ # model.eval()
87
+ # self.model = reparameterize_model(model) # fuse branches
88
 
 
 
89
  # self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
90
+ # self.model.to(self.device)
91
+
92
+ # # Fix 2: Explicitly set model to half-precision if on CUDA
93
+ # # This matches the behavior of torch.set_default_dtype(torch.float16)
94
+ # if self.device == "cuda":
95
+ # self.model.to(torch.float16)
96
 
97
  # def __call__(self, data):
98
  # payload = data.get("inputs", data)
 
103
 
104
  # # ---------------- decode inputs ----------------
105
  # image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
106
+ # img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
107
+
108
+ # # The preprocessor might output float32, so ensure tensor matches model dtype
109
+ # if self.device == "cuda":
110
+ # img_tensor = img_tensor.to(torch.float16)
111
+
112
  # text_tokens = self.tokenizer(labels).to(self.device)
113
 
114
  # # ---------------- forward pass -----------------
115
+ # # No need for autocast if everything is already float16
116
+ # with torch.no_grad():
 
 
117
  # img_feat = self.model.encode_image(img_tensor)
118
  # txt_feat = self.model.encode_text(text_tokens)
119
  # img_feat /= img_feat.norm(dim=-1, keepdim=True)
120
  # txt_feat /= txt_feat.norm(dim=-1, keepdim=True)
121
+ # probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].cpu().tolist()
122
 
123
  # return [
124
  # {"label": l, "score": float(p)}
125
  # for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)
126
  # ]
127
 
128
+
129