from PIL import Image from transformers import BlipProcessor, BlipForConditionalGeneration import torch from config import Config import cv2 import numpy as np import math # Simple global caching for the captioner captioner_processor = None captioner_model = None def resize_image_to_1mp(image): """Resizes image to approx 1MP (e.g., 1024x1024) preserving aspect ratio.""" image = image.convert("RGB") w, h = image.size target_pixels = 1024 * 1024 aspect_ratio = w / h # Calculate new dimensions new_h = int((target_pixels / aspect_ratio) ** 0.5) new_w = int(new_h * aspect_ratio) # Ensure divisibility by 48 for efficiency new_w = (new_w // 64) * 64 new_h = (new_h // 64) * 64 if new_w == 0 or new_h == 0: new_w, new_h = 1024, 1024 # Fallback return image.resize((new_w, new_h), Image.LANCZOS) def get_caption(image): """Generates a caption for the image if one isn't provided.""" global captioner_processor, captioner_model if captioner_model is None: print("Loading Captioner (BLIP)...") captioner_processor = BlipProcessor.from_pretrained(Config.CAPTIONER_REPO) captioner_model = BlipForConditionalGeneration.from_pretrained(Config.CAPTIONER_REPO).to(Config.DEVICE) inputs = captioner_processor(image, return_tensors="pt").to(Config.DEVICE) out = captioner_model.generate(**inputs) caption = captioner_processor.decode(out[0], skip_special_tokens=True) return caption # --- ADDED: Function from your provided file --- def draw_kps(image_pil, kps, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]): stickwidth = 4 limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]]) kps = np.array(kps) w, h = image_pil.size out_img = np.zeros([h, w, 3]) for i in range(len(limbSeq)): index = limbSeq[i] color = color_list[index[0]] x = kps[index][:, 0] y = kps[index][:, 1] length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5 angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1])) polygon = cv2.ellipse2Poly( (int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1 ) out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color) out_img = (out_img * 0.6).astype(np.uint8) for idx_kp, kp in enumerate(kps): color = color_list[idx_kp] x, y = kp out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1) out_img_pil = Image.fromarray(out_img.astype(np.uint8)) return out_img_pil # --- END ADDED ---