ZIT-Controlnet / videox_fun /models /flux2_image_processor.py
Alexander Bagus
22
be751d2
raw
history blame
5.14 kB
# Modified from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/flux2/image_processor.py
# Copyright 2025 The Black Forest Labs Team and The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
from typing import Tuple
import PIL.Image
from diffusers.configuration_utils import register_to_config
from diffusers.image_processor import VaeImageProcessor
class Flux2ImageProcessor(VaeImageProcessor):
r"""
Image processor to preprocess the reference (character) image for the Flux2 model.
Args:
do_resize (`bool`, *optional*, defaults to `True`):
Whether to downscale the image's (height, width) dimensions to multiples of `vae_scale_factor`. Can accept
`height` and `width` arguments from [`image_processor.VaeImageProcessor.preprocess`] method.
vae_scale_factor (`int`, *optional*, defaults to `16`):
VAE (spatial) scale factor. If `do_resize` is `True`, the image is automatically resized to multiples of
this factor.
vae_latent_channels (`int`, *optional*, defaults to `32`):
VAE latent channels.
do_normalize (`bool`, *optional*, defaults to `True`):
Whether to normalize the image to [-1,1].
do_convert_rgb (`bool`, *optional*, defaults to be `True`):
Whether to convert the images to RGB format.
"""
@register_to_config
def __init__(
self,
do_resize: bool = True,
vae_scale_factor: int = 16,
vae_latent_channels: int = 32,
do_normalize: bool = True,
do_convert_rgb: bool = True,
):
super().__init__(
do_resize=do_resize,
vae_scale_factor=vae_scale_factor,
vae_latent_channels=vae_latent_channels,
do_normalize=do_normalize,
do_convert_rgb=do_convert_rgb,
)
@staticmethod
def check_image_input(
image: PIL.Image.Image, max_aspect_ratio: int = 8, min_side_length: int = 64, max_area: int = 1024 * 1024
) -> PIL.Image.Image:
"""
Check if image meets minimum size and aspect ratio requirements.
Args:
image: PIL Image to validate
max_aspect_ratio: Maximum allowed aspect ratio (width/height or height/width)
min_side_length: Minimum pixels required for width and height
max_area: Maximum allowed area in pixels²
Returns:
The input image if valid
Raises:
ValueError: If image is too small or aspect ratio is too extreme
"""
if not isinstance(image, PIL.Image.Image):
raise ValueError(f"Image must be a PIL.Image.Image, got {type(image)}")
width, height = image.size
# Check minimum dimensions
if width < min_side_length or height < min_side_length:
raise ValueError(
f"Image too small: {width}×{height}. Both dimensions must be at least {min_side_length}px"
)
# Check aspect ratio
aspect_ratio = max(width / height, height / width)
if aspect_ratio > max_aspect_ratio:
raise ValueError(
f"Aspect ratio too extreme: {width}×{height} (ratio: {aspect_ratio:.1f}:1). "
f"Maximum allowed ratio is {max_aspect_ratio}:1"
)
return image
@staticmethod
def _resize_to_target_area(image: PIL.Image.Image, target_area: int = 1024 * 1024) -> Tuple[int, int]:
image_width, image_height = image.size
scale = math.sqrt(target_area / (image_width * image_height))
width = int(image_width * scale)
height = int(image_height * scale)
return image.resize((width, height), PIL.Image.Resampling.LANCZOS)
def _resize_and_crop(
self,
image: PIL.Image.Image,
width: int,
height: int,
) -> PIL.Image.Image:
r"""
center crop the image to the specified width and height.
Args:
image (`PIL.Image.Image`):
The image to resize and crop.
width (`int`):
The width to resize the image to.
height (`int`):
The height to resize the image to.
Returns:
`PIL.Image.Image`:
The resized and cropped image.
"""
image_width, image_height = image.size
left = (image_width - width) // 2
top = (image_height - height) // 2
right = left + width
bottom = top + height
return image.crop((left, top, right, bottom))