Spaces:

alexnasa
/

Wan2.2-Animate-ZEROGPU

Running on Zero

App Files Files Community

Wan2.2-Animate-ZEROGPU / wan /modules /animate /preprocess /utils.py

alexnasa

allow frames with no mask

9e1b05d verified about 1 month ago

raw

history blame

7.89 kB

	# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
	import os
	import cv2
	import math
	import random
	import numpy as np

	def get_mask_boxes(mask):
	"""
	Return [x_min, y_min, x_max, y_max] or None if mask is empty.
	mask: [h, w] (bool, 0/1, or uint8)
	"""
	# Accept any truthy mask
	y_coords, x_coords = np.nonzero(mask)
	if x_coords.size == 0 or y_coords.size == 0:
	return None

	x_min = int(x_coords.min())
	x_max = int(x_coords.max())
	y_min = int(y_coords.min())
	y_max = int(y_coords.max())
	return np.array([x_min, y_min, x_max, y_max], dtype=np.int32)


	def get_aug_mask(body_mask, w_len=10, h_len=20):
	"""
	Fills small holes/stripes inside the detected bbox area.
	Safely handles empty masks and degenerate boxes.
	"""
	# Ensure binary 0/1 uint8
	if body_mask.dtype != np.uint8:
	body_mask = (body_mask > 0).astype(np.uint8)

	body_bbox = get_mask_boxes(body_mask)
	if body_bbox is None:
	# Nothing to augment for this frame
	return body_mask

	x_min, y_min, x_max, y_max = map(int, body_bbox)
	if x_max <= x_min or y_max <= y_min:
	# Degenerate bbox; do nothing
	return body_mask

	bbox_w = x_max - x_min
	bbox_h = y_max - y_min

	# Prevent zero step sizes
	w_slice = max(1, int(bbox_w / max(1, w_len)))
	h_slice = max(1, int(bbox_h / max(1, h_len)))

	for each_w in range(x_min, x_max, w_slice):
	w_start = min(each_w, x_max)
	w_end = min(each_w + w_slice, x_max)
	for each_h in range(y_min, y_max, h_slice):
	h_start = min(each_h, y_max)
	h_end = min(each_h + h_slice, y_max)
	if body_mask[h_start:h_end, w_start:w_end].sum() > 0:
	body_mask[h_start:h_end, w_start:w_end] = 1

	return body_mask


	def get_mask_body_img(img_copy, hand_mask, k=7, iterations=1):
	kernel = np.ones((k, k), np.uint8)
	dilation = cv2.dilate(hand_mask, kernel, iterations=iterations)
	mask_hand_img = img_copy * (1 - dilation[:, :, None])

	return mask_hand_img, dilation


	def get_face_bboxes(kp2ds, scale, image_shape, ratio_aug):
	h, w = image_shape
	kp2ds_face = kp2ds.copy()[23:91, :2]

	min_x, min_y = np.min(kp2ds_face, axis=0)
	max_x, max_y = np.max(kp2ds_face, axis=0)


	initial_width = max_x - min_x
	initial_height = max_y - min_y

	initial_area = initial_width * initial_height

	expanded_area = initial_area * scale

	new_width = np.sqrt(expanded_area * (initial_width / initial_height))
	new_height = np.sqrt(expanded_area * (initial_height / initial_width))

	delta_width = (new_width - initial_width) / 2
	delta_height = (new_height - initial_height) / 4

	if ratio_aug:
	if random.random() > 0.5:
	delta_width += random.uniform(0, initial_width // 10)
	else:
	delta_height += random.uniform(0, initial_height // 10)

	expanded_min_x = max(min_x - delta_width, 0)
	expanded_max_x = min(max_x + delta_width, w)
	expanded_min_y = max(min_y - 3 * delta_height, 0)
	expanded_max_y = min(max_y + delta_height, h)

	return [int(expanded_min_x), int(expanded_max_x), int(expanded_min_y), int(expanded_max_y)]


	def calculate_new_size(orig_w, orig_h, target_area, divisor=64):

	target_ratio = orig_w / orig_h

	def check_valid(w, h):

	if w <= 0 or h <= 0:
	return False
	return (w * h <= target_area and
	w % divisor == 0 and
	h % divisor == 0)

	def get_ratio_diff(w, h):

	return abs(w / h - target_ratio)

	def round_to_64(value, round_up=False, divisor=64):

	if round_up:
	return divisor * ((value + (divisor - 1)) // divisor)
	return divisor * (value // divisor)

	possible_sizes = []

	max_area_h = int(np.sqrt(target_area / target_ratio))
	max_area_w = int(max_area_h * target_ratio)

	max_h = round_to_64(max_area_h, round_up=True, divisor=divisor)
	max_w = round_to_64(max_area_w, round_up=True, divisor=divisor)

	for h in range(divisor, max_h + divisor, divisor):
	ideal_w = h * target_ratio

	w_down = round_to_64(ideal_w)
	w_up = round_to_64(ideal_w, round_up=True)

	for w in [w_down, w_up]:
	if check_valid(w, h, divisor):
	possible_sizes.append((w, h, get_ratio_diff(w, h)))

	if not possible_sizes:
	raise ValueError("Can not find suitable size")

	possible_sizes.sort(key=lambda x: (-x[0] * x[1], x[2]))

	best_w, best_h, _ = possible_sizes[0]
	return int(best_w), int(best_h)


	def resize_by_area(image, target_area, keep_aspect_ratio=True, divisor=64, padding_color=(0, 0, 0)):
	h, w = image.shape[:2]
	try:
	new_w, new_h = calculate_new_size(w, h, target_area, divisor)
	except:
	aspect_ratio = w / h

	if keep_aspect_ratio:
	new_h = math.sqrt(target_area / aspect_ratio)
	new_w = target_area / new_h
	else:
	new_w = new_h = math.sqrt(target_area)

	new_w, new_h = int((new_w // divisor) * divisor), int((new_h // divisor) * divisor)

	interpolation = cv2.INTER_AREA if (new_w * new_h < w * h) else cv2.INTER_LINEAR

	resized_image = padding_resize(image, height=new_h, width=new_w, padding_color=padding_color,
	interpolation=interpolation)
	return resized_image


	def padding_resize(img_ori, height=512, width=512, padding_color=(0, 0, 0), interpolation=cv2.INTER_LINEAR):
	ori_height = img_ori.shape[0]
	ori_width = img_ori.shape[1]
	channel = img_ori.shape[2]

	img_pad = np.zeros((height, width, channel))
	if channel == 1:
	img_pad[:, :, 0] = padding_color[0]
	else:
	img_pad[:, :, 0] = padding_color[0]
	img_pad[:, :, 1] = padding_color[1]
	img_pad[:, :, 2] = padding_color[2]

	if (ori_height / ori_width) > (height / width):
	new_width = int(height / ori_height * ori_width)
	img = cv2.resize(img_ori, (new_width, height), interpolation=interpolation)
	padding = int((width - new_width) / 2)
	if len(img.shape) == 2:
	img = img[:, :, np.newaxis]
	img_pad[:, padding: padding + new_width, :] = img
	else:
	new_height = int(width / ori_width * ori_height)
	img = cv2.resize(img_ori, (width, new_height), interpolation=interpolation)
	padding = int((height - new_height) / 2)
	if len(img.shape) == 2:
	img = img[:, :, np.newaxis]
	img_pad[padding: padding + new_height, :, :] = img

	img_pad = np.uint8(img_pad)

	return img_pad


	def get_frame_indices(frame_num, video_fps, clip_length, train_fps):

	start_frame = 0
	times = np.arange(0, clip_length) / train_fps
	frame_indices = start_frame + np.round(times * video_fps).astype(int)
	frame_indices = np.clip(frame_indices, 0, frame_num - 1)

	return frame_indices.tolist()


	def get_face_bboxes(kp2ds, scale, image_shape):
	h, w = image_shape
	kp2ds_face = kp2ds.copy()[1:] * (w, h)

	min_x, min_y = np.min(kp2ds_face, axis=0)
	max_x, max_y = np.max(kp2ds_face, axis=0)

	initial_width = max_x - min_x
	initial_height = max_y - min_y

	initial_area = initial_width * initial_height

	expanded_area = initial_area * scale

	new_width = np.sqrt(expanded_area * (initial_width / initial_height))
	new_height = np.sqrt(expanded_area * (initial_height / initial_width))

	delta_width = (new_width - initial_width) / 2
	delta_height = (new_height - initial_height) / 4

	expanded_min_x = max(min_x - delta_width, 0)
	expanded_max_x = min(max_x + delta_width, w)
	expanded_min_y = max(min_y - 3 * delta_height, 0)
	expanded_max_y = min(max_y + delta_height, h)

	return [int(expanded_min_x), int(expanded_max_x), int(expanded_min_y), int(expanded_max_y)]