Spaces:

primerz
/

face-to-pixel-art

Running on Zero

App Files Files Community

primerz commited on 29 days ago

Commit

a910636

verified ·

1 Parent(s): ff641c2

Update generator.py

Browse files

Files changed (1) hide show

generator.py +17 -22

generator.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import torch
 from config import Config
 from utils import resize_image_to_1mp, get_caption
-from PIL import Image # <-- Make sure this import is at the top
 class Generator:
     def __init__(self, model_handler):
         self.mh = model_handler
-    # --- START FIX ---
     def prepare_control_images(self, image, width, height):
         """
         Generates conditioning maps, ensuring they are resized
@@ -16,29 +15,22 @@ class Generator:
         print(f"Generating control maps for {width}x{height}...")
         # Generate depth map
-        # The detector might return a different size (e.g., 512x512)
         depth_map_raw = self.mh.zoe_detector(image)
         # Generate lineart map
         lineart_map_raw = self.mh.lineart_detector(image)
         # Manually resize maps to match the exact output resolution
-        # This prevents the tensor mismatch error.
         depth_map = depth_map_raw.resize((width, height), Image.LANCZOS)
         lineart_map = lineart_map_raw.resize((width, height), Image.LANCZOS)
         return depth_map, lineart_map
-    # --- END FIX ---
     def predict(self, input_image, user_prompt=""):
         # 1. Pre-process Inputs
         print("Processing Input...")
         processed_image = resize_image_to_1mp(input_image)
-        # --- START FIX ---
-        # Get the exact dimensions for the control maps
         target_width, target_height = processed_image.size
-        # --- END FIX ---
         # 2. Get Face Embedding (Robust Mode)
         face_emb = self.mh.get_face_embedding(processed_image)
@@ -58,10 +50,7 @@ class Generator:
         # 4. Generate Control Maps (Structure)
         print("Generating Control Maps (Depth, LineArt)...")
-        # --- START FIX ---
-        # Pass target dimensions to the preprocessor
         depth_map, lineart_map = self.prepare_control_images(processed_image, target_width, target_height)
-        # --- END FIX ---
         # 5. Logic for Face vs No-Face
         # ControlNet order: [InstantID, Zoe, LineArt]
@@ -73,29 +62,35 @@ class Generator:
         else:
             print("No face detected: Disabling InstantID.")
             controlnet_conditioning_scale = [0.0, 0.4, 0.4] # Disable InstantID weight
-            control_guidance_end = [0.5, 0.8, 0.8] # Set end to avoid 0.0 >= 0.0 error
             self.mh.pipeline.set_ip_adapter_scale(0.0)
         # 6. Run Inference
         print("Running pipeline...")
         result = self.mh.pipeline(
             prompt=final_prompt,
-            image=processed_image,  # <-- Base image for Img2Img
-            # All 3 images are now guaranteed to be the same size
-            control_image=[processed_image, depth_map, lineart_map], # <-- ControlNet inputs
-            image_embeds=face_emb,  # <-- Face embedding for InstantID
-            strength=0.85, # Img2Img strength (0.8-0.9 is good for style)
             controlnet_conditioning_scale=controlnet_conditioning_scale,
             control_guidance_end=control_guidance_end,
             # LCM settings
             num_inference_steps=8,
-            guidance_scale=1.5,
-            clip_skip=2
         ).images[0]

 import torch
 from config import Config
 from utils import resize_image_to_1mp, get_caption
+from PIL import Image
 class Generator:
     def __init__(self, model_handler):
         self.mh = model_handler
     def prepare_control_images(self, image, width, height):
         """
         Generates conditioning maps, ensuring they are resized
         print(f"Generating control maps for {width}x{height}...")
         # Generate depth map
         depth_map_raw = self.mh.zoe_detector(image)
         # Generate lineart map
         lineart_map_raw = self.mh.lineart_detector(image)
         # Manually resize maps to match the exact output resolution
         depth_map = depth_map_raw.resize((width, height), Image.LANCZOS)
         lineart_map = lineart_map_raw.resize((width, height), Image.LANCZOS)
         return depth_map, lineart_map
     def predict(self, input_image, user_prompt=""):
         # 1. Pre-process Inputs
         print("Processing Input...")
         processed_image = resize_image_to_1mp(input_image)
         target_width, target_height = processed_image.size
         # 2. Get Face Embedding (Robust Mode)
         face_emb = self.mh.get_face_embedding(processed_image)
         # 4. Generate Control Maps (Structure)
         print("Generating Control Maps (Depth, LineArt)...")
         depth_map, lineart_map = self.prepare_control_images(processed_image, target_width, target_height)
         # 5. Logic for Face vs No-Face
         # ControlNet order: [InstantID, Zoe, LineArt]
         else:
             print("No face detected: Disabling InstantID.")
             controlnet_conditioning_scale = [0.0, 0.4, 0.4] # Disable InstantID weight
+            control_guidance_end = [0.5, 0.8, 0.8]
             self.mh.pipeline.set_ip_adapter_scale(0.0)
+            # --- START FIX for NoneType Error ---
+            # Create a dummy tensor instead of passing None
+            # Shape is (batch_size, embedding_dim)
+            face_emb = torch.zeros((1, 512), dtype=Config.DTYPE, device=Config.DEVICE)
+            # --- END FIX ---
         # 6. Run Inference
         print("Running pipeline...")
         result = self.mh.pipeline(
             prompt=final_prompt,
+            image=processed_image,  # Base image for Img2Img
+            control_image=[processed_image, depth_map, lineart_map], # ControlNet inputs
+            image_embeds=face_emb,  # Face embedding (or dummy)
+            strength=0.666, # <-- Img2Img strength
             controlnet_conditioning_scale=controlnet_conditioning_scale,
             control_guidance_end=control_guidance_end,
             # LCM settings
             num_inference_steps=8,
+            guidance_scale=1.75, # <-- CFG Scale
+            clip_skip=2,
+            # --- LoRA Strength ---
+            cross_attention_kwargs={"scale": 1.333}
         ).images[0]