import contextlib, io, base64, torch, json
from PIL import Image
import open_clip
from reparam import reparameterize_model

class EndpointHandler:
    def __init__(self, path: str = ""):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        # 1. Load the model (happens only once at startup)
        model, _, self.preprocess = open_clip.create_model_and_transforms(
            "MobileCLIP-B", pretrained='datacompdr'
        )
        model.eval()
        self.model = reparameterize_model(model)
        tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
        self.model.to(self.device)

        if self.device == "cuda":
            self.model.to(torch.float16)

        # --- OPTIMIZATION: Pre-compute text features from your JSON ---
        
        # 2. Load your rich class definitions from the file
        with open(f"{path}/items.json", "r", encoding="utf-8") as f:
            class_definitions = json.load(f)

        # 3. Prepare the data for encoding and for the final response
        #    - Use the 'prompt' field for creating the embeddings
        #    - Keep 'name' and 'id' to structure the response later
        prompts = [item['prompt'] for item in class_definitions]
        self.class_ids = [item['id'] for item in class_definitions]
        self.class_names = [item['name'] for item in class_definitions]
        
        # 4. Tokenize and encode all prompts at once
        with torch.no_grad():
            text_tokens = tokenizer(prompts).to(self.device)
            self.text_features = self.model.encode_text(text_tokens)
            self.text_features /= self.text_features.norm(dim=-1, keepdim=True)

    def __call__(self, data):
        # The payload only needs the image now
        payload = data.get("inputs", data)
        img_b64 = payload["image"]

        # ---------------- decode image ----------------
        image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
        img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)

        if self.device == "cuda":
            img_tensor = img_tensor.to(torch.float16)

        # ---------------- forward pass (very fast) -----------------
        with torch.no_grad():
            # 1. Encode only the image
            img_feat = self.model.encode_image(img_tensor)
            img_feat /= img_feat.norm(dim=-1, keepdim=True)

            # 2. Compute similarity against the pre-computed text features
            probs = (100 * img_feat @ self.text_features.T).softmax(dim=-1)[0]

        # 3. Combine the results with your stored class IDs and names
        #    and convert the tensor of probabilities to a list of floats
        results = zip(self.class_ids, self.class_names, probs.cpu().tolist())
        
        # 4. Create a sorted list of dictionaries for a clean JSON response
        return sorted(
            [{"id": i, "label": name, "score": float(p)} for i, name, p in results],
            key=lambda x: x["score"],
            reverse=True
        )
# import contextlib, io, base64, torch
# from PIL import Image
# import open_clip
# from reparam import reparameterize_model

# class EndpointHandler:
#     def __init__(self, path: str = ""):
#         self.device = "cuda" if torch.cuda.is_available() else "cpu"

#         # Fix 1: Load weights directly from the web, just like local script
#         # This guarantees the weights are identical.
#         model, _, self.preprocess = open_clip.create_model_and_transforms(
#             "MobileCLIP-B", pretrained='datacompdr'
#         )
#         model.eval()
#         self.model = reparameterize_model(model)  # fuse branches

#         self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
#         self.model.to(self.device)

#         # Fix 2: Explicitly set model to half-precision if on CUDA
#         # This matches the behavior of torch.set_default_dtype(torch.float16)
#         if self.device == "cuda":
#             self.model.to(torch.float16)

#     def __call__(self, data):
#         payload = data.get("inputs", data)
#         img_b64 = payload["image"]
#         labels  = payload.get("candidate_labels", [])
#         if not labels:
#             return {"error": "candidate_labels list is empty"}

#         # ---------------- decode inputs ----------------
#         image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
#         img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)

#         # The preprocessor might output float32, so ensure tensor matches model dtype
#         if self.device == "cuda":
#             img_tensor = img_tensor.to(torch.float16)

#         text_tokens = self.tokenizer(labels).to(self.device)

#         # ---------------- forward pass -----------------
#         # No need for autocast if everything is already float16
#         with torch.no_grad():
#             img_feat = self.model.encode_image(img_tensor)
#             txt_feat = self.model.encode_text(text_tokens)
#             img_feat /= img_feat.norm(dim=-1, keepdim=True)
#             txt_feat /= txt_feat.norm(dim=-1, keepdim=True)
#             probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].cpu().tolist()

#         return [
#             {"label": l, "score": float(p)}
#             for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)
#         ]