Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,670 Bytes
b851544 874765f b851544 eb833d8 b851544 ca59ddb eb833d8 b851544 ca59ddb b851544 874765f ed5ed53 b851544 eb833d8 ca59ddb eb833d8 b851544 eb833d8 b851544 eb833d8 b851544 874765f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch
from config import Config
import cv2
import numpy as np
import math
# Simple global caching for the captioner
captioner_processor = None
captioner_model = None
def resize_image_to_1mp(image):
"""Resizes image to approx 1MP (e.g., 1024x1024) preserving aspect ratio."""
image = image.convert("RGB")
w, h = image.size
target_pixels = 1024 * 1024
aspect_ratio = w / h
# Calculate new dimensions
new_h = int((target_pixels / aspect_ratio) ** 0.5)
new_w = int(new_h * aspect_ratio)
# Ensure divisibility by 48 for efficiency
new_w = (new_w // 64) * 64
new_h = (new_h // 64) * 64
if new_w == 0 or new_h == 0:
new_w, new_h = 1024, 1024 # Fallback
return image.resize((new_w, new_h), Image.LANCZOS)
def get_caption(image):
"""Generates a caption for the image if one isn't provided."""
global captioner_processor, captioner_model
if captioner_model is None:
print("Loading Captioner (BLIP)...")
captioner_processor = BlipProcessor.from_pretrained(Config.CAPTIONER_REPO)
captioner_model = BlipForConditionalGeneration.from_pretrained(Config.CAPTIONER_REPO).to(Config.DEVICE)
inputs = captioner_processor(image, return_tensors="pt").to(Config.DEVICE)
out = captioner_model.generate(**inputs)
caption = captioner_processor.decode(out[0], skip_special_tokens=True)
return caption
# --- ADDED: Function from your provided file ---
def draw_kps(image_pil, kps, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]):
stickwidth = 4
limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
kps = np.array(kps)
w, h = image_pil.size
out_img = np.zeros([h, w, 3])
for i in range(len(limbSeq)):
index = limbSeq[i]
color = color_list[index[0]]
x = kps[index][:, 0]
y = kps[index][:, 1]
length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
polygon = cv2.ellipse2Poly(
(int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1
)
out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
out_img = (out_img * 0.6).astype(np.uint8)
for idx_kp, kp in enumerate(kps):
color = color_list[idx_kp]
x, y = kp
out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
out_img_pil = Image.fromarray(out_img.astype(np.uint8))
return out_img_pil
# --- END ADDED --- |