File size: 2,670 Bytes
b851544
 
 
 
874765f
 
 
b851544
eb833d8
 
 
 
b851544
ca59ddb
eb833d8
b851544
ca59ddb
b851544
 
 
 
 
 
874765f
ed5ed53
 
b851544
eb833d8
ca59ddb
eb833d8
b851544
 
 
eb833d8
b851544
 
 
eb833d8
 
 
b851544
 
 
 
874765f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch
from config import Config
import cv2
import numpy as np
import math

# Simple global caching for the captioner
captioner_processor = None
captioner_model = None

def resize_image_to_1mp(image):
    """Resizes image to approx 1MP (e.g., 1024x1024) preserving aspect ratio."""
    image = image.convert("RGB")
    w, h = image.size
    target_pixels = 1024 * 1024 
    aspect_ratio = w / h
    
    # Calculate new dimensions
    new_h = int((target_pixels / aspect_ratio) ** 0.5)
    new_w = int(new_h * aspect_ratio)
    
    # Ensure divisibility by 48 for efficiency
    new_w = (new_w // 64) * 64
    new_h = (new_h // 64) * 64
    
    if new_w == 0 or new_h == 0:
        new_w, new_h = 1024, 1024  # Fallback
        
    return image.resize((new_w, new_h), Image.LANCZOS)

def get_caption(image):
    """Generates a caption for the image if one isn't provided."""
    global captioner_processor, captioner_model
    
    if captioner_model is None:
        print("Loading Captioner (BLIP)...")
        captioner_processor = BlipProcessor.from_pretrained(Config.CAPTIONER_REPO)
        captioner_model = BlipForConditionalGeneration.from_pretrained(Config.CAPTIONER_REPO).to(Config.DEVICE)

    inputs = captioner_processor(image, return_tensors="pt").to(Config.DEVICE)
    out = captioner_model.generate(**inputs)
    caption = captioner_processor.decode(out[0], skip_special_tokens=True)
    return caption

# --- ADDED: Function from your provided file ---
def draw_kps(image_pil, kps, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]):
    stickwidth = 4
    limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
    kps = np.array(kps)

    w, h = image_pil.size
    out_img = np.zeros([h, w, 3])

    for i in range(len(limbSeq)):
        index = limbSeq[i]
        color = color_list[index[0]]

        x = kps[index][:, 0]
        y = kps[index][:, 1]
        length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
        angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
        polygon = cv2.ellipse2Poly(
            (int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1
        )
        out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
    out_img = (out_img * 0.6).astype(np.uint8)

    for idx_kp, kp in enumerate(kps):
        color = color_list[idx_kp]
        x, y = kp
        out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)

    out_img_pil = Image.fromarray(out_img.astype(np.uint8))
    return out_img_pil
# --- END ADDED ---