Spaces:

primerz
/

face-to-pixel-art

Running on Zero

App Files Files Community

primerz commited on 29 days ago

Commit

60bf1c5

verified ·

1 Parent(s): 5a9aef6

Update generator.py

Browse files

Files changed (1) hide show

generator.py +34 -6

generator.py CHANGED Viewed

@@ -1,22 +1,45 @@
 import torch
 from config import Config
 from utils import resize_image_to_1mp, get_caption
 class Generator:
     def __init__(self, model_handler):
         self.mh = model_handler
-    def prepare_control_images(self, image):
-        """Generates the conditioning maps from the input image."""
-        depth_map = self.mh.zoe_detector(image)
-        lineart_map = self.mh.lineart_detector(image)
         return depth_map, lineart_map
     def predict(self, input_image, user_prompt=""):
         # 1. Pre-process Inputs
         print("Processing Input...")
         processed_image = resize_image_to_1mp(input_image)
         # 2. Get Face Embedding (Robust Mode)
         face_emb = self.mh.get_face_embedding(processed_image)
@@ -35,7 +58,10 @@ class Generator:
         # 4. Generate Control Maps (Structure)
         print("Generating Control Maps (Depth, LineArt)...")
-        depth_map, lineart_map = self.prepare_control_images(processed_image)
         # 5. Logic for Face vs No-Face
         # ControlNet order: [InstantID, Zoe, LineArt]
@@ -55,7 +81,10 @@ class Generator:
         result = self.mh.pipeline(
             prompt=final_prompt,
             image=processed_image,  # <-- Base image for Img2Img
             control_image=[processed_image, depth_map, lineart_map], # <-- ControlNet inputs
             image_embeds=face_emb,  # <-- Face embedding for InstantID
             strength=0.85, # Img2Img strength (0.8-0.9 is good for style)
@@ -66,7 +95,6 @@ class Generator:
             num_inference_steps=8,
             guidance_scale=1.5,
-            # --- ADDED ---
             clip_skip=2
         ).images[0]

 import torch
 from config import Config
 from utils import resize_image_to_1mp, get_caption
+from PIL import Image # <-- Make sure this import is at the top
 class Generator:
     def __init__(self, model_handler):
         self.mh = model_handler
+    # --- START FIX ---
+    def prepare_control_images(self, image, width, height):
+        """
+        Generates conditioning maps, ensuring they are resized
+        to the exact target dimensions (width, height).
+        """
+        print(f"Generating control maps for {width}x{height}...")
+        # Generate depth map
+        # The detector might return a different size (e.g., 512x512)
+        depth_map_raw = self.mh.zoe_detector(image)
+        # Generate lineart map
+        lineart_map_raw = self.mh.lineart_detector(image)
+        # Manually resize maps to match the exact output resolution
+        # This prevents the tensor mismatch error.
+        depth_map = depth_map_raw.resize((width, height), Image.LANCZOS)
+        lineart_map = lineart_map_raw.resize((width, height), Image.LANCZOS)
         return depth_map, lineart_map
+    # --- END FIX ---
     def predict(self, input_image, user_prompt=""):
         # 1. Pre-process Inputs
         print("Processing Input...")
         processed_image = resize_image_to_1mp(input_image)
+        # --- START FIX ---
+        # Get the exact dimensions for the control maps
+        target_width, target_height = processed_image.size
+        # --- END FIX ---
         # 2. Get Face Embedding (Robust Mode)
         face_emb = self.mh.get_face_embedding(processed_image)
         # 4. Generate Control Maps (Structure)
         print("Generating Control Maps (Depth, LineArt)...")
+        # --- START FIX ---
+        # Pass target dimensions to the preprocessor
+        depth_map, lineart_map = self.prepare_control_images(processed_image, target_width, target_height)
+        # --- END FIX ---
         # 5. Logic for Face vs No-Face
         # ControlNet order: [InstantID, Zoe, LineArt]
         result = self.mh.pipeline(
             prompt=final_prompt,
             image=processed_image,  # <-- Base image for Img2Img
+            # All 3 images are now guaranteed to be the same size
             control_image=[processed_image, depth_map, lineart_map], # <-- ControlNet inputs
             image_embeds=face_emb,  # <-- Face embedding for InstantID
             strength=0.85, # Img2Img strength (0.8-0.9 is good for style)
             num_inference_steps=8,
             guidance_scale=1.5,
             clip_skip=2
         ).images[0]