import torch from config import Config from utils import resize_image_to_1mp, get_caption from PIL import Image class Generator: def __init__(self, model_handler): self.mh = model_handler def prepare_control_images(self, image, width, height): print(f"Generating depth map for {width}x{height}...") depth_map_raw = self.mh.leres_detector(image) depth_map = depth_map_raw.resize((width, height), Image.LANCZOS) return depth_map def predict( self, input_image, user_prompt="", negative_prompt="", guidance_scale=1.5, num_inference_steps=6, img2img_strength=0.3, depth_strength=0.3, seed=-1 ): print("Processing Input...") processed_image = resize_image_to_1mp(input_image) target_width, target_height = processed_image.size if not user_prompt.strip(): try: generated_caption = get_caption(processed_image) final_prompt = f"{Config.STYLE_TRIGGER}, {generated_caption}" except Exception as e: print(f"Captioning failed: {e}, using default prompt.") final_prompt = f"{Config.STYLE_TRIGGER}, a beautiful pixel art image" else: final_prompt = f"{Config.STYLE_TRIGGER}, {user_prompt}" print(f"Prompt: {final_prompt}") print("Generating Control Map (Depth)...") depth_map = self.prepare_control_images(processed_image, target_width, target_height) if seed == -1 or seed is None: seed = torch.Generator().seed() generator = torch.Generator(device=Config.DEVICE).manual_seed(int(seed)) print(f"Using seed: {seed}") print("Running pipeline...") result = self.mh.pipeline( prompt=final_prompt, negative_prompt=negative_prompt, image=processed_image, control_image=depth_map, generator=generator, strength=img2img_strength, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, controlnet_conditioning_scale=depth_strength, clip_skip=Config.CLIP_SKIP, ).images[0] return result