alexnasa commited on
Commit
9e1b05d
·
verified ·
1 Parent(s): 3812595

allow frames with no mask

Browse files
Files changed (1) hide show
  1. wan/modules/animate/preprocess/utils.py +244 -225
wan/modules/animate/preprocess/utils.py CHANGED
@@ -1,226 +1,245 @@
1
- # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
2
- import os
3
- import cv2
4
- import math
5
- import random
6
- import numpy as np
7
-
8
- def get_mask_boxes(mask):
9
- """
10
-
11
- Args:
12
- mask: [h, w]
13
- Returns:
14
-
15
- """
16
- y_coords, x_coords = np.nonzero(mask)
17
- x_min = x_coords.min()
18
- x_max = x_coords.max()
19
- y_min = y_coords.min()
20
- y_max = y_coords.max()
21
- bbox = np.array([x_min, y_min, x_max, y_max]).astype(np.int32)
22
- return bbox
23
-
24
-
25
- def get_aug_mask(body_mask, w_len=10, h_len=20):
26
- body_bbox = get_mask_boxes(body_mask)
27
-
28
- bbox_wh = body_bbox[2:4] - body_bbox[0:2]
29
- w_slice = np.int32(bbox_wh[0] / w_len)
30
- h_slice = np.int32(bbox_wh[1] / h_len)
31
-
32
- for each_w in range(body_bbox[0], body_bbox[2], w_slice):
33
- w_start = min(each_w, body_bbox[2])
34
- w_end = min((each_w + w_slice), body_bbox[2])
35
- # print(w_start, w_end)
36
- for each_h in range(body_bbox[1], body_bbox[3], h_slice):
37
- h_start = min(each_h, body_bbox[3])
38
- h_end = min((each_h + h_slice), body_bbox[3])
39
- if body_mask[h_start:h_end, w_start:w_end].sum() > 0:
40
- body_mask[h_start:h_end, w_start:w_end] = 1
41
-
42
- return body_mask
43
-
44
- def get_mask_body_img(img_copy, hand_mask, k=7, iterations=1):
45
- kernel = np.ones((k, k), np.uint8)
46
- dilation = cv2.dilate(hand_mask, kernel, iterations=iterations)
47
- mask_hand_img = img_copy * (1 - dilation[:, :, None])
48
-
49
- return mask_hand_img, dilation
50
-
51
-
52
- def get_face_bboxes(kp2ds, scale, image_shape, ratio_aug):
53
- h, w = image_shape
54
- kp2ds_face = kp2ds.copy()[23:91, :2]
55
-
56
- min_x, min_y = np.min(kp2ds_face, axis=0)
57
- max_x, max_y = np.max(kp2ds_face, axis=0)
58
-
59
-
60
- initial_width = max_x - min_x
61
- initial_height = max_y - min_y
62
-
63
- initial_area = initial_width * initial_height
64
-
65
- expanded_area = initial_area * scale
66
-
67
- new_width = np.sqrt(expanded_area * (initial_width / initial_height))
68
- new_height = np.sqrt(expanded_area * (initial_height / initial_width))
69
-
70
- delta_width = (new_width - initial_width) / 2
71
- delta_height = (new_height - initial_height) / 4
72
-
73
- if ratio_aug:
74
- if random.random() > 0.5:
75
- delta_width += random.uniform(0, initial_width // 10)
76
- else:
77
- delta_height += random.uniform(0, initial_height // 10)
78
-
79
- expanded_min_x = max(min_x - delta_width, 0)
80
- expanded_max_x = min(max_x + delta_width, w)
81
- expanded_min_y = max(min_y - 3 * delta_height, 0)
82
- expanded_max_y = min(max_y + delta_height, h)
83
-
84
- return [int(expanded_min_x), int(expanded_max_x), int(expanded_min_y), int(expanded_max_y)]
85
-
86
-
87
- def calculate_new_size(orig_w, orig_h, target_area, divisor=64):
88
-
89
- target_ratio = orig_w / orig_h
90
-
91
- def check_valid(w, h):
92
-
93
- if w <= 0 or h <= 0:
94
- return False
95
- return (w * h <= target_area and
96
- w % divisor == 0 and
97
- h % divisor == 0)
98
-
99
- def get_ratio_diff(w, h):
100
-
101
- return abs(w / h - target_ratio)
102
-
103
- def round_to_64(value, round_up=False, divisor=64):
104
-
105
- if round_up:
106
- return divisor * ((value + (divisor - 1)) // divisor)
107
- return divisor * (value // divisor)
108
-
109
- possible_sizes = []
110
-
111
- max_area_h = int(np.sqrt(target_area / target_ratio))
112
- max_area_w = int(max_area_h * target_ratio)
113
-
114
- max_h = round_to_64(max_area_h, round_up=True, divisor=divisor)
115
- max_w = round_to_64(max_area_w, round_up=True, divisor=divisor)
116
-
117
- for h in range(divisor, max_h + divisor, divisor):
118
- ideal_w = h * target_ratio
119
-
120
- w_down = round_to_64(ideal_w)
121
- w_up = round_to_64(ideal_w, round_up=True)
122
-
123
- for w in [w_down, w_up]:
124
- if check_valid(w, h, divisor):
125
- possible_sizes.append((w, h, get_ratio_diff(w, h)))
126
-
127
- if not possible_sizes:
128
- raise ValueError("Can not find suitable size")
129
-
130
- possible_sizes.sort(key=lambda x: (-x[0] * x[1], x[2]))
131
-
132
- best_w, best_h, _ = possible_sizes[0]
133
- return int(best_w), int(best_h)
134
-
135
-
136
- def resize_by_area(image, target_area, keep_aspect_ratio=True, divisor=64, padding_color=(0, 0, 0)):
137
- h, w = image.shape[:2]
138
- try:
139
- new_w, new_h = calculate_new_size(w, h, target_area, divisor)
140
- except:
141
- aspect_ratio = w / h
142
-
143
- if keep_aspect_ratio:
144
- new_h = math.sqrt(target_area / aspect_ratio)
145
- new_w = target_area / new_h
146
- else:
147
- new_w = new_h = math.sqrt(target_area)
148
-
149
- new_w, new_h = int((new_w // divisor) * divisor), int((new_h // divisor) * divisor)
150
-
151
- interpolation = cv2.INTER_AREA if (new_w * new_h < w * h) else cv2.INTER_LINEAR
152
-
153
- resized_image = padding_resize(image, height=new_h, width=new_w, padding_color=padding_color,
154
- interpolation=interpolation)
155
- return resized_image
156
-
157
-
158
- def padding_resize(img_ori, height=512, width=512, padding_color=(0, 0, 0), interpolation=cv2.INTER_LINEAR):
159
- ori_height = img_ori.shape[0]
160
- ori_width = img_ori.shape[1]
161
- channel = img_ori.shape[2]
162
-
163
- img_pad = np.zeros((height, width, channel))
164
- if channel == 1:
165
- img_pad[:, :, 0] = padding_color[0]
166
- else:
167
- img_pad[:, :, 0] = padding_color[0]
168
- img_pad[:, :, 1] = padding_color[1]
169
- img_pad[:, :, 2] = padding_color[2]
170
-
171
- if (ori_height / ori_width) > (height / width):
172
- new_width = int(height / ori_height * ori_width)
173
- img = cv2.resize(img_ori, (new_width, height), interpolation=interpolation)
174
- padding = int((width - new_width) / 2)
175
- if len(img.shape) == 2:
176
- img = img[:, :, np.newaxis]
177
- img_pad[:, padding: padding + new_width, :] = img
178
- else:
179
- new_height = int(width / ori_width * ori_height)
180
- img = cv2.resize(img_ori, (width, new_height), interpolation=interpolation)
181
- padding = int((height - new_height) / 2)
182
- if len(img.shape) == 2:
183
- img = img[:, :, np.newaxis]
184
- img_pad[padding: padding + new_height, :, :] = img
185
-
186
- img_pad = np.uint8(img_pad)
187
-
188
- return img_pad
189
-
190
-
191
- def get_frame_indices(frame_num, video_fps, clip_length, train_fps):
192
-
193
- start_frame = 0
194
- times = np.arange(0, clip_length) / train_fps
195
- frame_indices = start_frame + np.round(times * video_fps).astype(int)
196
- frame_indices = np.clip(frame_indices, 0, frame_num - 1)
197
-
198
- return frame_indices.tolist()
199
-
200
-
201
- def get_face_bboxes(kp2ds, scale, image_shape):
202
- h, w = image_shape
203
- kp2ds_face = kp2ds.copy()[1:] * (w, h)
204
-
205
- min_x, min_y = np.min(kp2ds_face, axis=0)
206
- max_x, max_y = np.max(kp2ds_face, axis=0)
207
-
208
- initial_width = max_x - min_x
209
- initial_height = max_y - min_y
210
-
211
- initial_area = initial_width * initial_height
212
-
213
- expanded_area = initial_area * scale
214
-
215
- new_width = np.sqrt(expanded_area * (initial_width / initial_height))
216
- new_height = np.sqrt(expanded_area * (initial_height / initial_width))
217
-
218
- delta_width = (new_width - initial_width) / 2
219
- delta_height = (new_height - initial_height) / 4
220
-
221
- expanded_min_x = max(min_x - delta_width, 0)
222
- expanded_max_x = min(max_x + delta_width, w)
223
- expanded_min_y = max(min_y - 3 * delta_height, 0)
224
- expanded_max_y = min(max_y + delta_height, h)
225
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  return [int(expanded_min_x), int(expanded_max_x), int(expanded_min_y), int(expanded_max_y)]
 
1
+ # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
2
+ import os
3
+ import cv2
4
+ import math
5
+ import random
6
+ import numpy as np
7
+
8
+ def get_mask_boxes(mask):
9
+ """
10
+ Return [x_min, y_min, x_max, y_max] or None if mask is empty.
11
+ mask: [h, w] (bool, 0/1, or uint8)
12
+ """
13
+ # Accept any truthy mask
14
+ y_coords, x_coords = np.nonzero(mask)
15
+ if x_coords.size == 0 or y_coords.size == 0:
16
+ return None
17
+
18
+ x_min = int(x_coords.min())
19
+ x_max = int(x_coords.max())
20
+ y_min = int(y_coords.min())
21
+ y_max = int(y_coords.max())
22
+ return np.array([x_min, y_min, x_max, y_max], dtype=np.int32)
23
+
24
+
25
+ def get_aug_mask(body_mask, w_len=10, h_len=20):
26
+ """
27
+ Fills small holes/stripes inside the detected bbox area.
28
+ Safely handles empty masks and degenerate boxes.
29
+ """
30
+ # Ensure binary 0/1 uint8
31
+ if body_mask.dtype != np.uint8:
32
+ body_mask = (body_mask > 0).astype(np.uint8)
33
+
34
+ body_bbox = get_mask_boxes(body_mask)
35
+ if body_bbox is None:
36
+ # Nothing to augment for this frame
37
+ return body_mask
38
+
39
+ x_min, y_min, x_max, y_max = map(int, body_bbox)
40
+ if x_max <= x_min or y_max <= y_min:
41
+ # Degenerate bbox; do nothing
42
+ return body_mask
43
+
44
+ bbox_w = x_max - x_min
45
+ bbox_h = y_max - y_min
46
+
47
+ # Prevent zero step sizes
48
+ w_slice = max(1, int(bbox_w / max(1, w_len)))
49
+ h_slice = max(1, int(bbox_h / max(1, h_len)))
50
+
51
+ for each_w in range(x_min, x_max, w_slice):
52
+ w_start = min(each_w, x_max)
53
+ w_end = min(each_w + w_slice, x_max)
54
+ for each_h in range(y_min, y_max, h_slice):
55
+ h_start = min(each_h, y_max)
56
+ h_end = min(each_h + h_slice, y_max)
57
+ if body_mask[h_start:h_end, w_start:w_end].sum() > 0:
58
+ body_mask[h_start:h_end, w_start:w_end] = 1
59
+
60
+ return body_mask
61
+
62
+
63
+ def get_mask_body_img(img_copy, hand_mask, k=7, iterations=1):
64
+ kernel = np.ones((k, k), np.uint8)
65
+ dilation = cv2.dilate(hand_mask, kernel, iterations=iterations)
66
+ mask_hand_img = img_copy * (1 - dilation[:, :, None])
67
+
68
+ return mask_hand_img, dilation
69
+
70
+
71
+ def get_face_bboxes(kp2ds, scale, image_shape, ratio_aug):
72
+ h, w = image_shape
73
+ kp2ds_face = kp2ds.copy()[23:91, :2]
74
+
75
+ min_x, min_y = np.min(kp2ds_face, axis=0)
76
+ max_x, max_y = np.max(kp2ds_face, axis=0)
77
+
78
+
79
+ initial_width = max_x - min_x
80
+ initial_height = max_y - min_y
81
+
82
+ initial_area = initial_width * initial_height
83
+
84
+ expanded_area = initial_area * scale
85
+
86
+ new_width = np.sqrt(expanded_area * (initial_width / initial_height))
87
+ new_height = np.sqrt(expanded_area * (initial_height / initial_width))
88
+
89
+ delta_width = (new_width - initial_width) / 2
90
+ delta_height = (new_height - initial_height) / 4
91
+
92
+ if ratio_aug:
93
+ if random.random() > 0.5:
94
+ delta_width += random.uniform(0, initial_width // 10)
95
+ else:
96
+ delta_height += random.uniform(0, initial_height // 10)
97
+
98
+ expanded_min_x = max(min_x - delta_width, 0)
99
+ expanded_max_x = min(max_x + delta_width, w)
100
+ expanded_min_y = max(min_y - 3 * delta_height, 0)
101
+ expanded_max_y = min(max_y + delta_height, h)
102
+
103
+ return [int(expanded_min_x), int(expanded_max_x), int(expanded_min_y), int(expanded_max_y)]
104
+
105
+
106
+ def calculate_new_size(orig_w, orig_h, target_area, divisor=64):
107
+
108
+ target_ratio = orig_w / orig_h
109
+
110
+ def check_valid(w, h):
111
+
112
+ if w <= 0 or h <= 0:
113
+ return False
114
+ return (w * h <= target_area and
115
+ w % divisor == 0 and
116
+ h % divisor == 0)
117
+
118
+ def get_ratio_diff(w, h):
119
+
120
+ return abs(w / h - target_ratio)
121
+
122
+ def round_to_64(value, round_up=False, divisor=64):
123
+
124
+ if round_up:
125
+ return divisor * ((value + (divisor - 1)) // divisor)
126
+ return divisor * (value // divisor)
127
+
128
+ possible_sizes = []
129
+
130
+ max_area_h = int(np.sqrt(target_area / target_ratio))
131
+ max_area_w = int(max_area_h * target_ratio)
132
+
133
+ max_h = round_to_64(max_area_h, round_up=True, divisor=divisor)
134
+ max_w = round_to_64(max_area_w, round_up=True, divisor=divisor)
135
+
136
+ for h in range(divisor, max_h + divisor, divisor):
137
+ ideal_w = h * target_ratio
138
+
139
+ w_down = round_to_64(ideal_w)
140
+ w_up = round_to_64(ideal_w, round_up=True)
141
+
142
+ for w in [w_down, w_up]:
143
+ if check_valid(w, h, divisor):
144
+ possible_sizes.append((w, h, get_ratio_diff(w, h)))
145
+
146
+ if not possible_sizes:
147
+ raise ValueError("Can not find suitable size")
148
+
149
+ possible_sizes.sort(key=lambda x: (-x[0] * x[1], x[2]))
150
+
151
+ best_w, best_h, _ = possible_sizes[0]
152
+ return int(best_w), int(best_h)
153
+
154
+
155
+ def resize_by_area(image, target_area, keep_aspect_ratio=True, divisor=64, padding_color=(0, 0, 0)):
156
+ h, w = image.shape[:2]
157
+ try:
158
+ new_w, new_h = calculate_new_size(w, h, target_area, divisor)
159
+ except:
160
+ aspect_ratio = w / h
161
+
162
+ if keep_aspect_ratio:
163
+ new_h = math.sqrt(target_area / aspect_ratio)
164
+ new_w = target_area / new_h
165
+ else:
166
+ new_w = new_h = math.sqrt(target_area)
167
+
168
+ new_w, new_h = int((new_w // divisor) * divisor), int((new_h // divisor) * divisor)
169
+
170
+ interpolation = cv2.INTER_AREA if (new_w * new_h < w * h) else cv2.INTER_LINEAR
171
+
172
+ resized_image = padding_resize(image, height=new_h, width=new_w, padding_color=padding_color,
173
+ interpolation=interpolation)
174
+ return resized_image
175
+
176
+
177
+ def padding_resize(img_ori, height=512, width=512, padding_color=(0, 0, 0), interpolation=cv2.INTER_LINEAR):
178
+ ori_height = img_ori.shape[0]
179
+ ori_width = img_ori.shape[1]
180
+ channel = img_ori.shape[2]
181
+
182
+ img_pad = np.zeros((height, width, channel))
183
+ if channel == 1:
184
+ img_pad[:, :, 0] = padding_color[0]
185
+ else:
186
+ img_pad[:, :, 0] = padding_color[0]
187
+ img_pad[:, :, 1] = padding_color[1]
188
+ img_pad[:, :, 2] = padding_color[2]
189
+
190
+ if (ori_height / ori_width) > (height / width):
191
+ new_width = int(height / ori_height * ori_width)
192
+ img = cv2.resize(img_ori, (new_width, height), interpolation=interpolation)
193
+ padding = int((width - new_width) / 2)
194
+ if len(img.shape) == 2:
195
+ img = img[:, :, np.newaxis]
196
+ img_pad[:, padding: padding + new_width, :] = img
197
+ else:
198
+ new_height = int(width / ori_width * ori_height)
199
+ img = cv2.resize(img_ori, (width, new_height), interpolation=interpolation)
200
+ padding = int((height - new_height) / 2)
201
+ if len(img.shape) == 2:
202
+ img = img[:, :, np.newaxis]
203
+ img_pad[padding: padding + new_height, :, :] = img
204
+
205
+ img_pad = np.uint8(img_pad)
206
+
207
+ return img_pad
208
+
209
+
210
+ def get_frame_indices(frame_num, video_fps, clip_length, train_fps):
211
+
212
+ start_frame = 0
213
+ times = np.arange(0, clip_length) / train_fps
214
+ frame_indices = start_frame + np.round(times * video_fps).astype(int)
215
+ frame_indices = np.clip(frame_indices, 0, frame_num - 1)
216
+
217
+ return frame_indices.tolist()
218
+
219
+
220
+ def get_face_bboxes(kp2ds, scale, image_shape):
221
+ h, w = image_shape
222
+ kp2ds_face = kp2ds.copy()[1:] * (w, h)
223
+
224
+ min_x, min_y = np.min(kp2ds_face, axis=0)
225
+ max_x, max_y = np.max(kp2ds_face, axis=0)
226
+
227
+ initial_width = max_x - min_x
228
+ initial_height = max_y - min_y
229
+
230
+ initial_area = initial_width * initial_height
231
+
232
+ expanded_area = initial_area * scale
233
+
234
+ new_width = np.sqrt(expanded_area * (initial_width / initial_height))
235
+ new_height = np.sqrt(expanded_area * (initial_height / initial_width))
236
+
237
+ delta_width = (new_width - initial_width) / 2
238
+ delta_height = (new_height - initial_height) / 4
239
+
240
+ expanded_min_x = max(min_x - delta_width, 0)
241
+ expanded_max_x = min(max_x + delta_width, w)
242
+ expanded_min_y = max(min_y - 3 * delta_height, 0)
243
+ expanded_max_y = min(max_y + delta_height, h)
244
+
245
  return [int(expanded_min_x), int(expanded_max_x), int(expanded_min_y), int(expanded_max_y)]