# Modified from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/flux2/image_processor.py # Copyright 2025 The Black Forest Labs Team and The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import math from typing import Tuple import PIL.Image from diffusers.configuration_utils import register_to_config from diffusers.image_processor import VaeImageProcessor class Flux2ImageProcessor(VaeImageProcessor): r""" Image processor to preprocess the reference (character) image for the Flux2 model. Args: do_resize (`bool`, *optional*, defaults to `True`): Whether to downscale the image's (height, width) dimensions to multiples of `vae_scale_factor`. Can accept `height` and `width` arguments from [`image_processor.VaeImageProcessor.preprocess`] method. vae_scale_factor (`int`, *optional*, defaults to `16`): VAE (spatial) scale factor. If `do_resize` is `True`, the image is automatically resized to multiples of this factor. vae_latent_channels (`int`, *optional*, defaults to `32`): VAE latent channels. do_normalize (`bool`, *optional*, defaults to `True`): Whether to normalize the image to [-1,1]. do_convert_rgb (`bool`, *optional*, defaults to be `True`): Whether to convert the images to RGB format. """ @register_to_config def __init__( self, do_resize: bool = True, vae_scale_factor: int = 16, vae_latent_channels: int = 32, do_normalize: bool = True, do_convert_rgb: bool = True, ): super().__init__( do_resize=do_resize, vae_scale_factor=vae_scale_factor, vae_latent_channels=vae_latent_channels, do_normalize=do_normalize, do_convert_rgb=do_convert_rgb, ) @staticmethod def check_image_input( image: PIL.Image.Image, max_aspect_ratio: int = 8, min_side_length: int = 64, max_area: int = 1024 * 1024 ) -> PIL.Image.Image: """ Check if image meets minimum size and aspect ratio requirements. Args: image: PIL Image to validate max_aspect_ratio: Maximum allowed aspect ratio (width/height or height/width) min_side_length: Minimum pixels required for width and height max_area: Maximum allowed area in pixels² Returns: The input image if valid Raises: ValueError: If image is too small or aspect ratio is too extreme """ if not isinstance(image, PIL.Image.Image): raise ValueError(f"Image must be a PIL.Image.Image, got {type(image)}") width, height = image.size # Check minimum dimensions if width < min_side_length or height < min_side_length: raise ValueError( f"Image too small: {width}×{height}. Both dimensions must be at least {min_side_length}px" ) # Check aspect ratio aspect_ratio = max(width / height, height / width) if aspect_ratio > max_aspect_ratio: raise ValueError( f"Aspect ratio too extreme: {width}×{height} (ratio: {aspect_ratio:.1f}:1). " f"Maximum allowed ratio is {max_aspect_ratio}:1" ) return image @staticmethod def _resize_to_target_area(image: PIL.Image.Image, target_area: int = 1024 * 1024) -> Tuple[int, int]: image_width, image_height = image.size scale = math.sqrt(target_area / (image_width * image_height)) width = int(image_width * scale) height = int(image_height * scale) return image.resize((width, height), PIL.Image.Resampling.LANCZOS) def _resize_and_crop( self, image: PIL.Image.Image, width: int, height: int, ) -> PIL.Image.Image: r""" center crop the image to the specified width and height. Args: image (`PIL.Image.Image`): The image to resize and crop. width (`int`): The width to resize the image to. height (`int`): The height to resize the image to. Returns: `PIL.Image.Image`: The resized and cropped image. """ image_width, image_height = image.size left = (image_width - width) // 2 top = (image_height - height) // 2 right = left + width bottom = top + height return image.crop((left, top, right, bottom))