alexnasa commited on
Commit
7e9578f
Β·
verified Β·
1 Parent(s): 615b11b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +545 -545
app.py CHANGED
@@ -1,546 +1,546 @@
1
- import spaces
2
- from huggingface_hub import snapshot_download, hf_hub_download
3
- import os
4
- import subprocess
5
- import importlib, site
6
- from PIL import Image
7
- import uuid
8
- import shutil
9
- import time
10
- import cv2
11
- from generate import generate, load_model
12
- import json
13
-
14
- # Re-discover all .pth/.egg-link files
15
- for sitedir in site.getsitepackages():
16
- site.addsitedir(sitedir)
17
-
18
- # Clear caches so importlib will pick up new modules
19
- importlib.invalidate_caches()
20
-
21
- def sh(cmd): subprocess.check_call(cmd, shell=True)
22
-
23
- try:
24
- print("Attempting to download and build sam2...")
25
-
26
- print("download sam")
27
- sam_dir = snapshot_download(repo_id="alexnasa/sam2")
28
-
29
- @spaces.GPU(duration=450)
30
- def install_sam():
31
- os.environ["TORCH_CUDA_ARCH_LIST"] = "9.0"
32
- sh(f"cd {sam_dir} && python setup.py build_ext --inplace && pip install -e .")
33
-
34
- print("install sam")
35
- install_sam()
36
-
37
- # tell Python to re-scan site-packages now that the egg-link exists
38
- import importlib, site; site.addsitedir(site.getsitepackages()[0]); importlib.invalidate_caches()
39
-
40
- flash_attention_installed = True
41
- print("sam2 installed successfully.")
42
-
43
- except Exception as e:
44
- print(f"⚠️ Could not install sam2: {e}")
45
- print("Continuing without sam2...")
46
-
47
- import torch
48
- print(f"Torch version: {torch.__version__}")
49
-
50
- os.environ["PROCESSED_RESULTS"] = f"{os.getcwd()}/processed_results"
51
-
52
- import gradio as gr
53
-
54
-
55
- snapshot_download(repo_id="Wan-AI/Wan2.2-Animate-14B", local_dir="./Wan2.2-Animate-14B")
56
- wan_animate = load_model(True)
57
-
58
-
59
- rc_mapping = {
60
- "Video β†’ Ref Image" : False,
61
- "Video ← Ref Image" : True
62
- }
63
-
64
-
65
- def preprocess_video(input_video_path, session_id=None):
66
-
67
- if session_id is None:
68
- session_id = uuid.uuid4().hex
69
-
70
- output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
71
- os.makedirs(output_dir, exist_ok=True)
72
-
73
- process_video_path = os.path.join(output_dir, 'input_video.mp4')
74
-
75
- convert_video_to_30fps_and_clip(input_video_path, process_video_path, crop_width=720, crop_height=1280)
76
-
77
- return process_video_path
78
-
79
- def extract_audio_from_video_ffmpeg(video_path, output_wav_path, sample_rate=None):
80
- """
81
- Extracts the audio track from a video file and saves it as a WAV file.
82
-
83
- Args:
84
- video_path (str): Path to the input video file.
85
- output_wav_path (str): Path to save the extracted WAV file.
86
- sample_rate (int, optional): Output sample rate (e.g., 16000).
87
- If None, keep the original.
88
- """
89
- cmd = [
90
- 'ffmpeg',
91
- '-i', video_path, # Input video
92
- '-vn', # Disable video
93
- '-acodec', 'pcm_s16le', # 16-bit PCM (WAV format)
94
- '-ac', '1', # Mono channel (use '2' for stereo)
95
- '-y', # Overwrite output
96
- '-loglevel', 'error' # Cleaner output
97
- ]
98
-
99
- # Only add the sample rate option if explicitly specified
100
- if sample_rate is not None:
101
- cmd.extend(['-ar', str(sample_rate)])
102
-
103
- cmd.append(output_wav_path)
104
-
105
- try:
106
- subprocess.run(cmd, check=True, capture_output=True, text=True)
107
- except subprocess.CalledProcessError as e:
108
- raise RuntimeError(f"ffmpeg failed ({e.returncode}): {e.stderr.strip()}")
109
-
110
-
111
- def combine_video_and_audio_ffmpeg(video_path, audio_path, output_video_path):
112
- """
113
- Combines a silent MP4 video with a WAV audio file into a single MP4 with sound.
114
-
115
- Args:
116
- video_path (str): Path to the silent video file.
117
- audio_path (str): Path to the WAV audio file.
118
- output_video_path (str): Path to save the output MP4 with audio.
119
- """
120
- cmd = [
121
- 'ffmpeg',
122
- '-i', video_path, # Input video
123
- '-i', audio_path, # Input audio
124
- '-c:v', 'copy', # Copy video without re-encoding
125
- '-c:a', 'aac', # Encode audio as AAC (MP4-compatible)
126
- '-shortest', # Stop when the shortest stream ends
127
- '-y', # Overwrite output
128
- '-loglevel', 'error',
129
- output_video_path
130
- ]
131
-
132
- try:
133
- subprocess.run(cmd, check=True, capture_output=True, text=True)
134
- except subprocess.CalledProcessError as e:
135
- raise RuntimeError(f"ffmpeg failed ({e.returncode}): {e.stderr.strip()}")
136
-
137
-
138
- def convert_video_to_30fps_and_clip(
139
- input_video_path,
140
- output_video_path,
141
- duration_s=2,
142
- target_fps=30,
143
- crop_width=None,
144
- crop_height=None
145
- ):
146
- # Get input video dimensions using ffprobe
147
- if crop_width and crop_height:
148
- probe_cmd = [
149
- 'ffprobe', '-v', 'error', '-select_streams', 'v:0',
150
- '-show_entries', 'stream=width,height',
151
- '-of', 'json', input_video_path
152
- ]
153
- probe_result = subprocess.run(probe_cmd, capture_output=True, text=True, check=True)
154
- video_info = json.loads(probe_result.stdout)
155
- w = video_info['streams'][0]['width']
156
- h = video_info['streams'][0]['height']
157
-
158
- # Clamp crop size to not exceed actual dimensions
159
- crop_width = min(crop_width, w)
160
- crop_height = min(crop_height, h)
161
-
162
- # Center crop offsets
163
- crop_x = max((w - crop_width) // 2, 0)
164
- crop_y = max((h - crop_height) // 2, 0)
165
- crop_filter = f"crop={crop_width}:{crop_height}:{crop_x}:{crop_y}"
166
- else:
167
- crop_filter = None
168
-
169
- cmd = [
170
- 'ffmpeg',
171
- '-i', input_video_path,
172
- '-r', str(target_fps),
173
- '-t', str(duration_s),
174
- ]
175
-
176
- if crop_filter:
177
- cmd += ['-vf', crop_filter]
178
-
179
- cmd += [
180
- '-c:v', 'libx264',
181
- '-c:a', 'aac',
182
- '-strict', 'experimental',
183
- '-y',
184
- '-loglevel', 'error',
185
- output_video_path
186
- ]
187
-
188
- try:
189
- subprocess.run(cmd, check=True, capture_output=True, text=True)
190
- except subprocess.CalledProcessError as e:
191
- raise RuntimeError(f"ffmpeg failed ({e.returncode}): {e.stderr.strip()}")
192
-
193
- def get_frames_count(video_file):
194
-
195
- # Get video information
196
- cap = cv2.VideoCapture(video_file)
197
- if not cap.isOpened():
198
- error_msg = "Cannot open video file"
199
- gr.Warning(error_msg)
200
-
201
- orig_frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
202
- orig_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
203
- orig_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
204
-
205
- cap.release()
206
-
207
- return orig_frame_count
208
-
209
- def calculate_time_required(input_video, rc_bool):
210
-
211
- frames_count = get_frames_count(input_video)
212
-
213
- chunks = frames_count // 77 + 1
214
-
215
-
216
- if rc_bool:
217
- pose2d_tracking_duration_s = 75
218
- iteration_per_step_s = 13
219
- else:
220
- pose2d_tracking_duration_s = 50
221
- iteration_per_step_s = 12
222
-
223
- time_required = pose2d_tracking_duration_s + iteration_per_step_s * 20 * chunks
224
- print(f'for frames_count:{frames_count} doing {chunks} chunks the time_required is {time_required}')
225
- return time_required
226
-
227
- def update_time_required(input_video, rc_str):
228
-
229
- if input_video is None:
230
- return gr.update(value="⌚ Zero GPU Required: --")
231
-
232
- rc_bool = rc_mapping[rc_str]
233
-
234
- duration_s = calculate_time_required(input_video, rc_bool)
235
- duration_m = duration_s / 60
236
-
237
- return gr.update(value=f"⌚ Zero GPU Required: ~{duration_s}.0s ({duration_m:.1f} mins)")
238
-
239
- def get_duration(input_video, edited_frame, rc_bool, session_id, progress):
240
-
241
- return calculate_time_required(input_video, rc_bool)
242
-
243
-
244
- @spaces.GPU(duration=get_duration)
245
- def _animate(input_video, edited_frame, rc_bool, session_id = None, progress=gr.Progress(track_tqdm=True),):
246
-
247
- if session_id is None:
248
- session_id = uuid.uuid4().hex
249
-
250
- output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
251
- os.makedirs(output_dir, exist_ok=True)
252
-
253
- preprocess_dir = os.path.join(output_dir, "preprocess_dir")
254
- os.makedirs(preprocess_dir, exist_ok=True)
255
-
256
- output_video_path = os.path.join(output_dir, 'result.mp4')
257
-
258
- # --- Measure preprocess time ---
259
- start_preprocess = time.time()
260
-
261
- # w = 720
262
- # h = 480
263
-
264
- # w = 720
265
- # h = 1280
266
-
267
- w = 480
268
- h = 832
269
-
270
- # w = 480
271
- # h = 720
272
-
273
- tag_string = "retarget_flag"
274
-
275
- if rc_bool:
276
- tag_string = "replace_flag"
277
-
278
- sh("python ./wan/modules/animate/preprocess/preprocess_data.py "
279
- "--ckpt_path ./Wan2.2-Animate-14B/process_checkpoint "
280
- f"--video_path {input_video} "
281
- f"--refer_path {edited_frame} "
282
- f"--save_path {preprocess_dir} "
283
- f"--resolution_area {w} {h} --{tag_string} "
284
- )
285
-
286
- preprocess_time = time.time() - start_preprocess
287
- print(f"Preprocess took {preprocess_time:.2f} seconds")
288
-
289
- # --- Measure generate time ---
290
- start_generate = time.time()
291
-
292
- generate(wan_animate, preprocess_dir, output_video_path, rc_bool)
293
-
294
- generate_time = time.time() - start_generate
295
- print(f"Generate took {generate_time:.2f} seconds")
296
-
297
- # --- Optional total time ---
298
- total_time = preprocess_time + generate_time
299
- print(f"Total time: {total_time:.2f} seconds")
300
-
301
- return output_video_path
302
-
303
- def animate_scene(input_video, edited_frame, rc_str, session_id = None, progress=gr.Progress(track_tqdm=True),):
304
-
305
- if not input_video:
306
- raise gr.Error("Please provide an video")
307
-
308
- if not edited_frame:
309
- raise gr.Error("Please provide an image")
310
-
311
- if session_id is None:
312
- session_id = uuid.uuid4().hex
313
-
314
- rc_bool = rc_mapping[rc_str]
315
-
316
- output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
317
- os.makedirs(output_dir, exist_ok=True)
318
-
319
- input_audio_path = os.path.join(output_dir, 'input_audio.wav')
320
-
321
- extract_audio_from_video_ffmpeg(input_video, input_audio_path)
322
-
323
- output_video_path = _animate(input_video, edited_frame, rc_bool, session_id, progress)
324
-
325
- final_video_path = os.path.join(output_dir, 'final_result.mp4')
326
-
327
- preprocess_dir = os.path.join(output_dir, "preprocess_dir")
328
- pose_video = os.path.join(preprocess_dir, 'src_pose.mp4')
329
-
330
- if rc_bool:
331
- mask_video = os.path.join(preprocess_dir, 'src_mask.mp4')
332
- bg_video = os.path.join(preprocess_dir, 'src_bg.mp4')
333
- face_video = os.path.join(preprocess_dir, 'src_face.mp4')
334
- else:
335
- mask_video = os.path.join(preprocess_dir, 'src_pose.mp4')
336
- bg_video = os.path.join(preprocess_dir, 'src_pose.mp4')
337
- face_video = os.path.join(preprocess_dir, 'src_pose.mp4')
338
-
339
- combine_video_and_audio_ffmpeg(output_video_path, input_audio_path, final_video_path)
340
-
341
- return final_video_path, pose_video, bg_video, mask_video, face_video
342
-
343
- css = """
344
- #col-container {
345
- margin: 0 auto;
346
- max-width: 1600px;
347
- }
348
-
349
- #step-column {
350
- padding: 20px;
351
- border-radius: 8px;
352
- box-shadow: var(--card-shadow);
353
- margin: 10px;
354
- }
355
-
356
- #col-showcase {
357
- margin: 0 auto;
358
- max-width: 1100px;
359
- }
360
-
361
- .button-gradient {
362
- background: linear-gradient(45deg, rgb(255, 65, 108), rgb(255, 75, 43), rgb(255, 155, 0), rgb(255, 65, 108)) 0% 0% / 400% 400%;
363
- border: none;
364
- padding: 14px 28px;
365
- font-size: 16px;
366
- font-weight: bold;
367
- color: white;
368
- border-radius: 10px;
369
- cursor: pointer;
370
- transition: 0.3s ease-in-out;
371
- animation: 2s linear 0s infinite normal none running gradientAnimation;
372
- box-shadow: rgba(255, 65, 108, 0.6) 0px 4px 10px;
373
- }
374
-
375
- .toggle-container {
376
- display: inline-flex;
377
- background-color: #ffd6ff; /* light pink background */
378
- border-radius: 9999px;
379
- padding: 4px;
380
- position: relative;
381
- width: fit-content;
382
- font-family: sans-serif;
383
- }
384
-
385
- .toggle-container input[type="radio"] {
386
- display: none;
387
- }
388
-
389
- .toggle-container label {
390
- position: relative;
391
- z-index: 2;
392
- flex: 1;
393
- text-align: center;
394
- font-weight: 700;
395
- color: #4b2ab5; /* dark purple text for unselected */
396
- padding: 6px 22px;
397
- border-radius: 9999px;
398
- cursor: pointer;
399
- transition: color 0.25s ease;
400
- }
401
-
402
- /* Moving highlight */
403
- .toggle-highlight {
404
- position: absolute;
405
- top: 4px;
406
- left: 4px;
407
- width: calc(50% - 4px);
408
- height: calc(100% - 8px);
409
- background-color: #4b2ab5; /* dark purple background */
410
- border-radius: 9999px;
411
- transition: transform 0.25s ease;
412
- z-index: 1;
413
- }
414
-
415
- /* When "True" is checked */
416
- #true:checked ~ label[for="true"] {
417
- color: #ffd6ff; /* light pink text */
418
- }
419
-
420
- /* When "False" is checked */
421
- #false:checked ~ label[for="false"] {
422
- color: #ffd6ff; /* light pink text */
423
- }
424
-
425
- /* Move highlight to right side when False is checked */
426
- #false:checked ~ .toggle-highlight {
427
- transform: translateX(100%);
428
- }
429
- """
430
- def start_session(request: gr.Request):
431
-
432
- return request.session_hash
433
-
434
- def cleanup(request: gr.Request):
435
-
436
- sid = request.session_hash
437
-
438
- if sid:
439
- d1 = os.path.join(os.environ["PROCESSED_RESULTS"], sid)
440
- shutil.rmtree(d1, ignore_errors=True)
441
-
442
- with gr.Blocks(css=css, title="Wan 2.2 Animate --replace", theme=gr.themes.Ocean()) as demo:
443
-
444
- session_state = gr.State()
445
- demo.load(start_session, outputs=[session_state])
446
-
447
- with gr.Column(elem_id="col-container"):
448
- with gr.Row():
449
- gr.HTML(
450
- """
451
- <div style="text-align: center;">
452
- <p style="font-size:16px; display: inline; margin: 0;">
453
- <strong>Wan2.2-Animate-14B </strong>
454
- </p>
455
- <a href="https://huggingface.co/Wan-AI/Wan2.2-Animate-14B" style="display: inline-block; vertical-align: middle; margin-left: 0.5em;">
456
- [Model]
457
- </a>
458
- <div style="text-align: center;">
459
- <p style="font-size:16px; display: inline; margin: 0;">
460
- HF Space By:
461
- </p>
462
- <a href="https://huggingface.co/alexnasa" style="display: inline-block; vertical-align: middle; margin-left: 0.5em;">
463
- <img src="https://img.shields.io/badge/πŸ€—-Follow Me-yellow.svg">
464
- </a>
465
- </div>
466
- """
467
- )
468
- with gr.Row():
469
- with gr.Column(elem_id="step-column"):
470
- gr.HTML("""
471
- <div>
472
- <span style="font-size: 24px;">1. Upload a Video</span><br>
473
- </div>
474
- """)
475
- input_video = gr.Video(label="Input Video", height=512)
476
-
477
-
478
- with gr.Column(elem_id="step-column"):
479
- gr.HTML("""
480
- <div>
481
- <span style="font-size: 24px;">2. Upload a Ref Image</span><br>
482
- </div>
483
- """)
484
- edited_frame = gr.Image(label="Ref Image", type="filepath", height=512)
485
- gr.HTML("""
486
- <div>
487
- <span style="font-size: 24px;">3. Choose Mode</span><br>
488
- </div>
489
- """)
490
- replace_character_string = gr.Radio(
491
- ["Video β†’ Ref Image", "Video ← Ref Image"], value="Video β†’ Ref Image", show_label=False
492
- )
493
-
494
- with gr.Column(elem_id="step-column"):
495
- gr.HTML("""
496
- <div>
497
- <span style="font-size: 24px;">4. Wan Animate it!</span><br>
498
- </div>
499
- """)
500
- output_video = gr.Video(label="Edited Video", height=512)
501
-
502
- time_required = gr.Text(value="⌚ Zero GPU Required: --", show_label=False)
503
- action_button = gr.Button("Wan Animate πŸ¦†", variant='primary', elem_classes="button-gradient")
504
-
505
- with gr.Accordion("Preprocessed Data", open=False, visible=False):
506
- pose_video = gr.Video(label="Pose Video", height=512)
507
- bg_video = gr.Video(label="Background Video", height=512)
508
- face_video = gr.Video(label="Face Video", height=512)
509
- mask_video = gr.Video(label="Mask Video", height=512)
510
-
511
- with gr.Row():
512
- with gr.Column(elem_id="col-showcase"):
513
-
514
- gr.Examples(
515
- examples=[
516
-
517
- [
518
- "./examples/desi.mp4",
519
- "./examples/desi.png",
520
- "Video ← Ref Image"
521
- ],
522
-
523
- [
524
- "./examples/paul.mp4",
525
- "./examples/man.png",
526
- "Video β†’ Ref Image"
527
- ],
528
-
529
-
530
- ],
531
- inputs=[input_video, edited_frame, replace_character_string],
532
- outputs=[output_video, pose_video, bg_video, mask_video, face_video],
533
- fn=animate_scene,
534
- cache_examples=True,
535
- )
536
-
537
- action_button.click(fn=animate_scene, inputs=[input_video, edited_frame, replace_character_string, session_state], outputs=[output_video, pose_video, bg_video, mask_video, face_video])
538
-
539
- input_video.upload(preprocess_video, inputs=[input_video, session_state], outputs=[input_video]).then(update_time_required, inputs=[input_video, replace_character_string], outputs=[time_required])
540
- replace_character_string.change(update_time_required, inputs=[input_video, replace_character_string], outputs=[time_required])
541
-
542
- if __name__ == "__main__":
543
- demo.queue()
544
- demo.unload(cleanup)
545
- demo.launch(ssr_mode=False, share=True)
546
 
 
1
+ import spaces
2
+ from huggingface_hub import snapshot_download, hf_hub_download
3
+ import os
4
+ import subprocess
5
+ import importlib, site
6
+ from PIL import Image
7
+ import uuid
8
+ import shutil
9
+ import time
10
+ import cv2
11
+ from generate import generate, load_model
12
+ import json
13
+
14
+ # Re-discover all .pth/.egg-link files
15
+ for sitedir in site.getsitepackages():
16
+ site.addsitedir(sitedir)
17
+
18
+ # Clear caches so importlib will pick up new modules
19
+ importlib.invalidate_caches()
20
+
21
+ def sh(cmd): subprocess.check_call(cmd, shell=True)
22
+
23
+ try:
24
+ print("Attempting to download and build sam2...")
25
+
26
+ print("download sam")
27
+ sam_dir = snapshot_download(repo_id="alexnasa/sam2")
28
+
29
+ @spaces.GPU(duration=450)
30
+ def install_sam():
31
+ os.environ["TORCH_CUDA_ARCH_LIST"] = "9.0"
32
+ sh(f"cd {sam_dir} && python setup.py build_ext --inplace && pip install -e .")
33
+
34
+ print("install sam")
35
+ install_sam()
36
+
37
+ # tell Python to re-scan site-packages now that the egg-link exists
38
+ import importlib, site; site.addsitedir(site.getsitepackages()[0]); importlib.invalidate_caches()
39
+
40
+ flash_attention_installed = True
41
+ print("sam2 installed successfully.")
42
+
43
+ except Exception as e:
44
+ print(f"⚠️ Could not install sam2: {e}")
45
+ print("Continuing without sam2...")
46
+
47
+ import torch
48
+ print(f"Torch version: {torch.__version__}")
49
+
50
+ os.environ["PROCESSED_RESULTS"] = f"{os.getcwd()}/processed_results"
51
+
52
+ import gradio as gr
53
+
54
+
55
+ snapshot_download(repo_id="Wan-AI/Wan2.2-Animate-14B", local_dir="./Wan2.2-Animate-14B")
56
+ wan_animate = load_model(True)
57
+
58
+
59
+ rc_mapping = {
60
+ "Video β†’ Ref Image" : False,
61
+ "Video ← Ref Image" : True
62
+ }
63
+
64
+
65
+ def preprocess_video(input_video_path, session_id=None):
66
+
67
+ if session_id is None:
68
+ session_id = uuid.uuid4().hex
69
+
70
+ output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
71
+ os.makedirs(output_dir, exist_ok=True)
72
+
73
+ process_video_path = os.path.join(output_dir, 'input_video.mp4')
74
+
75
+ convert_video_to_30fps_and_clip(input_video_path, process_video_path, crop_width=720, crop_height=1280)
76
+
77
+ return process_video_path
78
+
79
+ def extract_audio_from_video_ffmpeg(video_path, output_wav_path, sample_rate=None):
80
+ """
81
+ Extracts the audio track from a video file and saves it as a WAV file.
82
+
83
+ Args:
84
+ video_path (str): Path to the input video file.
85
+ output_wav_path (str): Path to save the extracted WAV file.
86
+ sample_rate (int, optional): Output sample rate (e.g., 16000).
87
+ If None, keep the original.
88
+ """
89
+ cmd = [
90
+ 'ffmpeg',
91
+ '-i', video_path, # Input video
92
+ '-vn', # Disable video
93
+ '-acodec', 'pcm_s16le', # 16-bit PCM (WAV format)
94
+ '-ac', '1', # Mono channel (use '2' for stereo)
95
+ '-y', # Overwrite output
96
+ '-loglevel', 'error' # Cleaner output
97
+ ]
98
+
99
+ # Only add the sample rate option if explicitly specified
100
+ if sample_rate is not None:
101
+ cmd.extend(['-ar', str(sample_rate)])
102
+
103
+ cmd.append(output_wav_path)
104
+
105
+ try:
106
+ subprocess.run(cmd, check=True, capture_output=True, text=True)
107
+ except subprocess.CalledProcessError as e:
108
+ raise RuntimeError(f"ffmpeg failed ({e.returncode}): {e.stderr.strip()}")
109
+
110
+
111
+ def combine_video_and_audio_ffmpeg(video_path, audio_path, output_video_path):
112
+ """
113
+ Combines a silent MP4 video with a WAV audio file into a single MP4 with sound.
114
+
115
+ Args:
116
+ video_path (str): Path to the silent video file.
117
+ audio_path (str): Path to the WAV audio file.
118
+ output_video_path (str): Path to save the output MP4 with audio.
119
+ """
120
+ cmd = [
121
+ 'ffmpeg',
122
+ '-i', video_path, # Input video
123
+ '-i', audio_path, # Input audio
124
+ '-c:v', 'copy', # Copy video without re-encoding
125
+ '-c:a', 'aac', # Encode audio as AAC (MP4-compatible)
126
+ '-shortest', # Stop when the shortest stream ends
127
+ '-y', # Overwrite output
128
+ '-loglevel', 'error',
129
+ output_video_path
130
+ ]
131
+
132
+ try:
133
+ subprocess.run(cmd, check=True, capture_output=True, text=True)
134
+ except subprocess.CalledProcessError as e:
135
+ raise RuntimeError(f"ffmpeg failed ({e.returncode}): {e.stderr.strip()}")
136
+
137
+
138
+ def convert_video_to_30fps_and_clip(
139
+ input_video_path,
140
+ output_video_path,
141
+ duration_s=2,
142
+ target_fps=30,
143
+ crop_width=None,
144
+ crop_height=None
145
+ ):
146
+ # Get input video dimensions using ffprobe
147
+ if crop_width and crop_height:
148
+ probe_cmd = [
149
+ 'ffprobe', '-v', 'error', '-select_streams', 'v:0',
150
+ '-show_entries', 'stream=width,height',
151
+ '-of', 'json', input_video_path
152
+ ]
153
+ probe_result = subprocess.run(probe_cmd, capture_output=True, text=True, check=True)
154
+ video_info = json.loads(probe_result.stdout)
155
+ w = video_info['streams'][0]['width']
156
+ h = video_info['streams'][0]['height']
157
+
158
+ # Clamp crop size to not exceed actual dimensions
159
+ crop_width = min(crop_width, w)
160
+ crop_height = min(crop_height, h)
161
+
162
+ # Center crop offsets
163
+ crop_x = max((w - crop_width) // 2, 0)
164
+ crop_y = max((h - crop_height) // 2, 0)
165
+ crop_filter = f"crop={crop_width}:{crop_height}:{crop_x}:{crop_y}"
166
+ else:
167
+ crop_filter = None
168
+
169
+ cmd = [
170
+ 'ffmpeg',
171
+ '-i', input_video_path,
172
+ '-r', str(target_fps),
173
+ '-t', str(duration_s),
174
+ ]
175
+
176
+ if crop_filter:
177
+ cmd += ['-vf', crop_filter]
178
+
179
+ cmd += [
180
+ '-c:v', 'libx264',
181
+ '-c:a', 'aac',
182
+ '-strict', 'experimental',
183
+ '-y',
184
+ '-loglevel', 'error',
185
+ output_video_path
186
+ ]
187
+
188
+ try:
189
+ subprocess.run(cmd, check=True, capture_output=True, text=True)
190
+ except subprocess.CalledProcessError as e:
191
+ raise RuntimeError(f"ffmpeg failed ({e.returncode}): {e.stderr.strip()}")
192
+
193
+ def get_frames_count(video_file):
194
+
195
+ # Get video information
196
+ cap = cv2.VideoCapture(video_file)
197
+ if not cap.isOpened():
198
+ error_msg = "Cannot open video file"
199
+ gr.Warning(error_msg)
200
+
201
+ orig_frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
202
+ orig_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
203
+ orig_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
204
+
205
+ cap.release()
206
+
207
+ return orig_frame_count
208
+
209
+ def calculate_time_required(input_video, rc_bool):
210
+
211
+ frames_count = get_frames_count(input_video)
212
+
213
+ chunks = frames_count // 77 + 1
214
+
215
+
216
+ if rc_bool:
217
+ pose2d_tracking_duration_s = 75
218
+ iteration_per_step_s = 13
219
+ else:
220
+ pose2d_tracking_duration_s = 50
221
+ iteration_per_step_s = 12
222
+
223
+ time_required = pose2d_tracking_duration_s + iteration_per_step_s * 20 * chunks
224
+ print(f'for frames_count:{frames_count} doing {chunks} chunks the time_required is {time_required}')
225
+ return time_required
226
+
227
+ def update_time_required(input_video, rc_str):
228
+
229
+ if input_video is None:
230
+ return gr.update(value="⌚ Zero GPU Required: --")
231
+
232
+ rc_bool = rc_mapping[rc_str]
233
+
234
+ duration_s = calculate_time_required(input_video, rc_bool)
235
+ duration_m = duration_s / 60
236
+
237
+ return gr.update(value=f"⌚ Zero GPU Required: ~{duration_s}.0s ({duration_m:.1f} mins)")
238
+
239
+ def get_duration(input_video, edited_frame, rc_bool, session_id, progress):
240
+
241
+ return calculate_time_required(input_video, rc_bool)
242
+
243
+
244
+ @spaces.GPU(duration=get_duration)
245
+ def _animate(input_video, edited_frame, rc_bool, session_id = None, progress=gr.Progress(track_tqdm=True),):
246
+
247
+ if session_id is None:
248
+ session_id = uuid.uuid4().hex
249
+
250
+ output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
251
+ os.makedirs(output_dir, exist_ok=True)
252
+
253
+ preprocess_dir = os.path.join(output_dir, "preprocess_dir")
254
+ os.makedirs(preprocess_dir, exist_ok=True)
255
+
256
+ output_video_path = os.path.join(output_dir, 'result.mp4')
257
+
258
+ # --- Measure preprocess time ---
259
+ start_preprocess = time.time()
260
+
261
+ # w = 720
262
+ # h = 480
263
+
264
+ # w = 720
265
+ # h = 1280
266
+
267
+ w = 480
268
+ h = 832
269
+
270
+ # w = 480
271
+ # h = 720
272
+
273
+ tag_string = "retarget_flag"
274
+
275
+ if rc_bool:
276
+ tag_string = "replace_flag"
277
+
278
+ sh("python ./wan/modules/animate/preprocess/preprocess_data.py "
279
+ "--ckpt_path ./Wan2.2-Animate-14B/process_checkpoint "
280
+ f"--video_path {input_video} "
281
+ f"--refer_path {edited_frame} "
282
+ f"--save_path {preprocess_dir} "
283
+ f"--resolution_area {w} {h} --{tag_string} "
284
+ )
285
+
286
+ preprocess_time = time.time() - start_preprocess
287
+ print(f"Preprocess took {preprocess_time:.2f} seconds")
288
+
289
+ # --- Measure generate time ---
290
+ start_generate = time.time()
291
+
292
+ generate(wan_animate, preprocess_dir, output_video_path, rc_bool)
293
+
294
+ generate_time = time.time() - start_generate
295
+ print(f"Generate took {generate_time:.2f} seconds")
296
+
297
+ # --- Optional total time ---
298
+ total_time = preprocess_time + generate_time
299
+ print(f"Total time: {total_time:.2f} seconds")
300
+
301
+ return output_video_path
302
+
303
+ def animate_scene(input_video, edited_frame, rc_str, session_id = None, progress=gr.Progress(track_tqdm=True),):
304
+
305
+ if not input_video:
306
+ raise gr.Error("Please provide an video")
307
+
308
+ if not edited_frame:
309
+ raise gr.Error("Please provide an image")
310
+
311
+ if session_id is None:
312
+ session_id = uuid.uuid4().hex
313
+
314
+ rc_bool = rc_mapping[rc_str]
315
+
316
+ output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
317
+ os.makedirs(output_dir, exist_ok=True)
318
+
319
+ input_audio_path = os.path.join(output_dir, 'input_audio.wav')
320
+
321
+ extract_audio_from_video_ffmpeg(input_video, input_audio_path)
322
+
323
+ output_video_path = _animate(input_video, edited_frame, rc_bool, session_id, progress)
324
+
325
+ final_video_path = os.path.join(output_dir, 'final_result.mp4')
326
+
327
+ preprocess_dir = os.path.join(output_dir, "preprocess_dir")
328
+ pose_video = os.path.join(preprocess_dir, 'src_pose.mp4')
329
+
330
+ if rc_bool:
331
+ mask_video = os.path.join(preprocess_dir, 'src_mask.mp4')
332
+ bg_video = os.path.join(preprocess_dir, 'src_bg.mp4')
333
+ face_video = os.path.join(preprocess_dir, 'src_face.mp4')
334
+ else:
335
+ mask_video = os.path.join(preprocess_dir, 'src_pose.mp4')
336
+ bg_video = os.path.join(preprocess_dir, 'src_pose.mp4')
337
+ face_video = os.path.join(preprocess_dir, 'src_pose.mp4')
338
+
339
+ combine_video_and_audio_ffmpeg(output_video_path, input_audio_path, final_video_path)
340
+
341
+ return final_video_path, pose_video, bg_video, mask_video, face_video
342
+
343
+ css = """
344
+ #col-container {
345
+ margin: 0 auto;
346
+ max-width: 1600px;
347
+ }
348
+
349
+ #step-column {
350
+ padding: 20px;
351
+ border-radius: 8px;
352
+ box-shadow: var(--card-shadow);
353
+ margin: 10px;
354
+ }
355
+
356
+ #col-showcase {
357
+ margin: 0 auto;
358
+ max-width: 1100px;
359
+ }
360
+
361
+ .button-gradient {
362
+ background: linear-gradient(45deg, rgb(255, 65, 108), rgb(255, 75, 43), rgb(255, 155, 0), rgb(255, 65, 108)) 0% 0% / 400% 400%;
363
+ border: none;
364
+ padding: 14px 28px;
365
+ font-size: 16px;
366
+ font-weight: bold;
367
+ color: white;
368
+ border-radius: 10px;
369
+ cursor: pointer;
370
+ transition: 0.3s ease-in-out;
371
+ animation: 2s linear 0s infinite normal none running gradientAnimation;
372
+ box-shadow: rgba(255, 65, 108, 0.6) 0px 4px 10px;
373
+ }
374
+
375
+ .toggle-container {
376
+ display: inline-flex;
377
+ background-color: #ffd6ff; /* light pink background */
378
+ border-radius: 9999px;
379
+ padding: 4px;
380
+ position: relative;
381
+ width: fit-content;
382
+ font-family: sans-serif;
383
+ }
384
+
385
+ .toggle-container input[type="radio"] {
386
+ display: none;
387
+ }
388
+
389
+ .toggle-container label {
390
+ position: relative;
391
+ z-index: 2;
392
+ flex: 1;
393
+ text-align: center;
394
+ font-weight: 700;
395
+ color: #4b2ab5; /* dark purple text for unselected */
396
+ padding: 6px 22px;
397
+ border-radius: 9999px;
398
+ cursor: pointer;
399
+ transition: color 0.25s ease;
400
+ }
401
+
402
+ /* Moving highlight */
403
+ .toggle-highlight {
404
+ position: absolute;
405
+ top: 4px;
406
+ left: 4px;
407
+ width: calc(50% - 4px);
408
+ height: calc(100% - 8px);
409
+ background-color: #4b2ab5; /* dark purple background */
410
+ border-radius: 9999px;
411
+ transition: transform 0.25s ease;
412
+ z-index: 1;
413
+ }
414
+
415
+ /* When "True" is checked */
416
+ #true:checked ~ label[for="true"] {
417
+ color: #ffd6ff; /* light pink text */
418
+ }
419
+
420
+ /* When "False" is checked */
421
+ #false:checked ~ label[for="false"] {
422
+ color: #ffd6ff; /* light pink text */
423
+ }
424
+
425
+ /* Move highlight to right side when False is checked */
426
+ #false:checked ~ .toggle-highlight {
427
+ transform: translateX(100%);
428
+ }
429
+ """
430
+ def start_session(request: gr.Request):
431
+
432
+ return request.session_hash
433
+
434
+ def cleanup(request: gr.Request):
435
+
436
+ sid = request.session_hash
437
+
438
+ if sid:
439
+ d1 = os.path.join(os.environ["PROCESSED_RESULTS"], sid)
440
+ shutil.rmtree(d1, ignore_errors=True)
441
+
442
+ with gr.Blocks(css=css, title="Wan 2.2 Animate --replace", theme=gr.themes.Ocean()) as demo:
443
+
444
+ session_state = gr.State()
445
+ demo.load(start_session, outputs=[session_state])
446
+
447
+ with gr.Column(elem_id="col-container"):
448
+ with gr.Row():
449
+ gr.HTML(
450
+ """
451
+ <div style="text-align: center;">
452
+ <p style="font-size:16px; display: inline; margin: 0;">
453
+ <strong>Wan2.2-Animate-14B </strong>
454
+ </p>
455
+ <a href="https://huggingface.co/Wan-AI/Wan2.2-Animate-14B" style="display: inline-block; vertical-align: middle; margin-left: 0.5em;">
456
+ [Model]
457
+ </a>
458
+ <div style="text-align: center;">
459
+ <p style="font-size:16px; display: inline; margin: 0;">
460
+ HF Space By:
461
+ </p>
462
+ <a href="https://huggingface.co/alexnasa" style="display: inline-block; vertical-align: middle; margin-left: 0.5em;">
463
+ <img src="https://img.shields.io/badge/πŸ€—-Follow Me-yellow.svg">
464
+ </a>
465
+ </div>
466
+ """
467
+ )
468
+ with gr.Row():
469
+ with gr.Column(elem_id="step-column"):
470
+ gr.HTML("""
471
+ <div>
472
+ <span style="font-size: 24px;">1. Upload a Video</span><br>
473
+ </div>
474
+ """)
475
+ input_video = gr.Video(label="Input Video", height=512)
476
+
477
+
478
+ with gr.Column(elem_id="step-column"):
479
+ gr.HTML("""
480
+ <div>
481
+ <span style="font-size: 24px;">2. Upload a Ref Image</span><br>
482
+ </div>
483
+ """)
484
+ edited_frame = gr.Image(label="Ref Image", type="filepath", height=512)
485
+ gr.HTML("""
486
+ <div>
487
+ <span style="font-size: 24px;">3. Choose Mode</span><br>
488
+ </div>
489
+ """)
490
+ replace_character_string = gr.Radio(
491
+ ["Video β†’ Ref Image", "Video ← Ref Image"], value="Video β†’ Ref Image", show_label=False
492
+ )
493
+
494
+ with gr.Column(elem_id="step-column"):
495
+ gr.HTML("""
496
+ <div>
497
+ <span style="font-size: 24px;">4. Wan Animate it!</span><br>
498
+ </div>
499
+ """)
500
+ output_video = gr.Video(label="Edited Video", height=512)
501
+
502
+ time_required = gr.Text(value="⌚ Zero GPU Required: --", show_label=False)
503
+ action_button = gr.Button("Wan Animate πŸ¦†", variant='primary', elem_classes="button-gradient")
504
+
505
+ with gr.Accordion("Preprocessed Data", open=False, visible=False):
506
+ pose_video = gr.Video(label="Pose Video", height=512)
507
+ bg_video = gr.Video(label="Background Video", height=512)
508
+ face_video = gr.Video(label="Face Video", height=512)
509
+ mask_video = gr.Video(label="Mask Video", height=512)
510
+
511
+ with gr.Row():
512
+ with gr.Column(elem_id="col-showcase"):
513
+
514
+ gr.Examples(
515
+ examples=[
516
+
517
+ [
518
+ "./examples/desi.mp4",
519
+ "./examples/desi.png",
520
+ "Video β†’ Ref Image"
521
+ ],
522
+
523
+ [
524
+ "./examples/paul.mp4",
525
+ "./examples/man.png",
526
+ "Video ← Ref Image"
527
+ ],
528
+
529
+
530
+ ],
531
+ inputs=[input_video, edited_frame, replace_character_string],
532
+ outputs=[output_video, pose_video, bg_video, mask_video, face_video],
533
+ fn=animate_scene,
534
+ cache_examples=True,
535
+ )
536
+
537
+ action_button.click(fn=animate_scene, inputs=[input_video, edited_frame, replace_character_string, session_state], outputs=[output_video, pose_video, bg_video, mask_video, face_video])
538
+
539
+ input_video.upload(preprocess_video, inputs=[input_video, session_state], outputs=[input_video]).then(update_time_required, inputs=[input_video, replace_character_string], outputs=[time_required])
540
+ replace_character_string.change(update_time_required, inputs=[input_video, replace_character_string], outputs=[time_required])
541
+
542
+ if __name__ == "__main__":
543
+ demo.queue()
544
+ demo.unload(cleanup)
545
+ demo.launch(ssr_mode=False, share=True)
546