Spaces:

primerz
/

face-to-pixel-art

Running on Zero

File size: 2,670 Bytes

b851544
 
 
 
874765f
 
 
b851544
eb833d8
 
 
 
b851544
ca59ddb
eb833d8
b851544
ca59ddb
b851544
 
 
 
 
 
874765f
ed5ed53
 
b851544
eb833d8
ca59ddb
eb833d8
b851544
 
 
eb833d8
b851544
 
 
eb833d8
 
 
b851544
 
 
 
874765f

from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch
from config import Config
import cv2
import numpy as np
import math

# Simple global caching for the captioner
captioner_processor = None
captioner_model = None

def resize_image_to_1mp(image):
    """Resizes image to approx 1MP (e.g., 1024x1024) preserving aspect ratio."""
    image = image.convert("RGB")
    w, h = image.size
    target_pixels = 1024 * 1024 
    aspect_ratio = w / h
    
    # Calculate new dimensions
    new_h = int((target_pixels / aspect_ratio) ** 0.5)
    new_w = int(new_h * aspect_ratio)
    
    # Ensure divisibility by 48 for efficiency
    new_w = (new_w // 64) * 64
    new_h = (new_h // 64) * 64
    
    if new_w == 0 or new_h == 0:
        new_w, new_h = 1024, 1024  # Fallback
        
    return image.resize((new_w, new_h), Image.LANCZOS)

def get_caption(image):
    """Generates a caption for the image if one isn't provided."""
    global captioner_processor, captioner_model
    
    if captioner_model is None:
        print("Loading Captioner (BLIP)...")
        captioner_processor = BlipProcessor.from_pretrained(Config.CAPTIONER_REPO)
        captioner_model = BlipForConditionalGeneration.from_pretrained(Config.CAPTIONER_REPO).to(Config.DEVICE)

    inputs = captioner_processor(image, return_tensors="pt").to(Config.DEVICE)
    out = captioner_model.generate(**inputs)
    caption = captioner_processor.decode(out[0], skip_special_tokens=True)
    return caption

# --- ADDED: Function from your provided file ---
def draw_kps(image_pil, kps, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]):
    stickwidth = 4
    limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
    kps = np.array(kps)

    w, h = image_pil.size
    out_img = np.zeros([h, w, 3])

    for i in range(len(limbSeq)):
        index = limbSeq[i]
        color = color_list[index[0]]

        x = kps[index][:, 0]
        y = kps[index][:, 1]
        length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
        angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
        polygon = cv2.ellipse2Poly(
            (int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1
        )
        out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
    out_img = (out_img * 0.6).astype(np.uint8)

    for idx_kp, kp in enumerate(kps):
        color = color_list[idx_kp]
        x, y = kp
        out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)

    out_img_pil = Image.fromarray(out_img.astype(np.uint8))
    return out_img_pil
# --- END ADDED ---