matthewkram commited on
Commit
5f8a662
·
verified ·
1 Parent(s): 776c2db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -103
app.py CHANGED
@@ -1,142 +1,138 @@
1
- import streamlit as st
 
 
 
 
 
2
  import torch
3
  from diffusers import StableVideoDiffusionPipeline
4
  from PIL import Image
5
  import numpy as np
6
  import cv2
7
- from io import BytesIO
8
- from diffusers.utils import export_to_video
9
  import tempfile
10
- import time
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- @st.cache_resource
13
- def load_model():
14
- model_name = "stabilityai/stable-video-diffusion-img2vid-xt"
15
- pipe = StableVideoDiffusionPipeline.from_pretrained(
16
- model_name,
17
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, # FP16 на GPU для ускорения
18
- variant="fp16"
19
- )
20
- device = "cuda" if torch.cuda.is_available() else "cpu"
21
- pipe.to(device)
22
- st.info(f"Модель загружена на {device.upper()}. Если CPU — процесс будет очень медленным!")
23
- return pipe
24
 
25
- def predict(ref_img_bytes, video_bytes, model_id, model, progress_bar, status_text):
26
- pipe = load_model()
27
 
28
- ref_image = Image.open(BytesIO(ref_img_bytes)).convert("RGB").resize((576, 320))
 
 
 
29
 
30
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
31
- temp_video_file.write(video_bytes)
32
- temp_video_path = temp_video_file.name
33
 
34
- cap = cv2.VideoCapture(temp_video_path)
35
- frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
36
- cap.release()
37
- motion_hint = f" with dynamic motion from {frame_count} frames"
38
 
39
- num_frames = 25 if model == "wan-pro" else 14
40
- num_steps = 25 if model == "wan-pro" else 15
 
 
 
 
 
 
 
 
 
41
 
42
- noise_aug_strength = 0.02
43
- if model_id == "wan2.2-animate-mix":
44
- noise_aug_strength = 0.1
45
 
46
- def step_callback(step: int, timestep: int, latents: torch.FloatTensor):
47
- progress = (step + 1) / num_steps
48
- progress_bar.progress(progress)
49
- status_text.text(f"Шаг {step + 1}/{num_steps} ({int(progress * 100)}%). Время на шаг: ~{int(time.time() - start_time)} сек")
50
- return latents
51
 
52
- generator = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(42)
53
- start_time = time.time() # Для времени на шаг
54
- output = pipe(
55
- ref_image,
56
- num_inference_steps=num_steps,
57
- num_frames=num_frames,
58
- generator=generator,
59
- decode_chunk_size=2,
60
- noise_aug_strength=noise_aug_strength,
61
- callback_on_step_end=step_callback
62
- ).frames[0]
63
 
64
- with tempfile.NamedTemporaryFile(suffix=".mp4") as temp_output:
65
- export_to_video(output, temp_output.name, fps=7)
66
- temp_output.seek(0)
67
- output_bytes = temp_output.read()
 
 
 
 
68
 
69
- return output_bytes, "SUCCEEDED" + motion_hint
 
 
70
 
71
- st.title("Wan2.2-Animate (Local No API)")
72
- st.markdown("""
73
- Wan2.2-Animate: Unified Character Animation and Replacement with Holistic Replication
74
- Local version without API (SVD Proxy)
75
- Tongyi Lab, Alibaba
76
- 📄Paper 💻GitHub 🤗HF Model
77
- """)
78
 
79
- with st.expander("Usage Instructions (инструкции)", expanded=False):
80
- st.markdown("""
81
- ‼️Usage (использования) Wan-Animate supports two modes:
82
 
83
- * Move Mode: animate the character in input image with movements from the input video
84
 
85
- * Mix Mode: replace the character in input video with the character in input image
86
 
87
- Wan-Animate supports two modes:
88
 
89
- * Move Mode: Use the movements extracted from the input video to drive the character in the input image
90
 
91
- * Mix Mode: Use the character in the input image to replace the character in the input video
92
 
93
- Currently, the following restrictions apply to inputs:
94
 
95
- * Video file size: Less than 200MB
96
 
97
- * Video resolution: The shorter side must be greater than 200, and the longer side must be less than 2048
98
 
99
- * Video duration: 2s to 30s
100
 
101
- * Video aspect ratio: 1:3 to 3:1
102
 
103
- * Video formats: mp4, avi, mov
104
 
105
- * Image file size: Less than 5MB
106
 
107
- * Image resolution: The shorter side must be greater than 200, and the longer side must be less than 4096
108
 
109
- * Image formats: jpg, png, jpeg, webp, bmp
110
 
111
- Current, the inference quality has two variants. You can use our open-source code for more flexible configuration.
 
112
 
113
- * wan-pro: 25fps, 720p
 
 
 
 
 
 
 
114
 
115
- * wan-std: 15fps, 720p
116
- """)
 
117
 
118
- ref_img = st.file_uploader("Reference Image (изображение)", type=["jpg", "png", "jpeg", "webp", "bmp"])
119
- video = st.file_uploader("Template Video (шаблонное видео)", type=["mp4", "avi", "mov"])
 
 
 
120
 
121
- col1, col2 = st.columns(2)
122
- with col1:
123
- model_id = st.selectbox("Mode (режим)", ["wan2.2-animate-move", "wan2.2-animate-mix"])
124
- with col2:
125
- model = st.selectbox("Inference Quality (качество)", ["wan-pro", "wan-std"])
126
 
127
- if st.button("Generate Video (��енерировать)"):
128
- if ref_img and video:
129
- progress_bar = st.progress(0)
130
- status_text = st.empty()
131
- with st.spinner("Генерация... (на CPU это медленно)"):
132
- try:
133
- output_bytes, status = predict(ref_img.read(), video.read(), model_id, model, progress_bar, status_text)
134
- st.video(output_bytes)
135
- st.success(status)
136
- except Exception as e:
137
- st.error(f"Failed: {str(e)}")
138
- finally:
139
- progress_bar.empty()
140
- status_text.empty()
141
- else:
142
- st.error("Загрузите изображение и видео!")
 
1
+ import os
2
+ import sys
3
+ import uuid
4
+ import shutil
5
+ import time
6
+ import gradio as gr
7
  import torch
8
  from diffusers import StableVideoDiffusionPipeline
9
  from PIL import Image
10
  import numpy as np
11
  import cv2
 
 
12
  import tempfile
13
+ from diffusers.utils import export_to_video
14
+
15
+ class WanAnimateApp:
16
+ def __init__(self):
17
+ model_name = "stabilityai/stable-video-diffusion-img2vid-xt"
18
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
19
+ self.pipe = StableVideoDiffusionPipeline.from_pretrained(
20
+ model_name,
21
+ torch_dtype=dtype,
22
+ variant="fp16"
23
+ )
24
+ device = "cuda" if torch.cuda.is_available() else "cpu"
25
+ self.pipe.to(device)
26
+ gr.Info(f"Модель на {device.upper()}. Если CPU — переключись на GPU в Settings!")
27
 
28
+ def predict(self, ref_img, video, model_id, model, progress=gr.Progress()):
29
+ if ref_img is None or video is None:
30
+ return None, "Upload both image and video."
 
 
 
 
 
 
 
 
 
31
 
32
+ progress(0, desc="Подготовка...")
33
+ ref_image = Image.fromarray(ref_img).convert("RGB").resize((576, 320))
34
 
35
+ cap = cv2.VideoCapture(video)
36
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
37
+ cap.release()
38
+ motion_hint = f" with dynamic motion from {frame_count} frames"
39
 
40
+ num_frames = 25 if model == "wan-pro" else 14
41
+ num_steps = 25 if model == "wan-pro" else 15
 
42
 
43
+ noise_aug_strength = 0.02
44
+ if model_id == "wan2.2-animate-mix":
45
+ noise_aug_strength = 0.1
 
46
 
47
+ generator = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(42)
48
+ start_time = time.time()
49
+ output = self.pipe(
50
+ ref_image,
51
+ num_inference_steps=num_steps,
52
+ num_frames=num_frames,
53
+ generator=generator,
54
+ decode_chunk_size=2,
55
+ noise_aug_strength=noise_aug_strength,
56
+ callback_on_step_end=lambda step, timestep, latents: progress((step + 1) / num_steps, desc=f"Шаг {step + 1}/{num_steps}. Время: {int(time.time() - start_time)} сек")
57
+ ).frames[0]
58
 
59
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video:
60
+ export_to_video(output, temp_video.name, fps=7)
 
61
 
62
+ return temp_video.name, "SUCCEEDED" + motion_hint
 
 
 
 
63
 
64
+ def start_app():
65
+ os.makedirs("/tmp/gradio", exist_ok=True)
 
 
 
 
 
 
 
 
 
66
 
67
+ app = WanAnimateApp()
68
+ with gr.Blocks(title="Wan2.2-Animate (Local No API)") as demo:
69
+ gr.HTML("""
70
+ Wan2.2-Animate: Unified Character Animation and Replacement with Holistic Replication
71
+ Local version without API (SVD Proxy)
72
+ Tongyi Lab, Alibaba
73
+ 📄Paper 💻GitHub 🤗HF Model
74
+ """)
75
 
76
+ with gr.Accordion("Usage Instructions (инструкции)", open=False):
77
+ gr.HTML("""
78
+ ‼️Usage (использования) Wan-Animate supports two modes:
79
 
80
+ * Move Mode: animate the character in input image with movements from the input video
 
 
 
 
 
 
81
 
82
+ * Mix Mode: replace the character in input video with the character in input video
 
 
83
 
84
+ Wan-Animate supports two modes:
85
 
86
+ * Move Mode: Use the movements extracted from the input video to drive the character in the input image
87
 
88
+ * Mix Mode: Use the character in the input image to replace the character in the input video
89
 
90
+ Currently, the following restrictions apply to inputs:
91
 
92
+ * Video file size: Less than 200MB
93
 
94
+ * Video resolution: The shorter side must be greater than 200, and the longer side must be less than 2048
95
 
96
+ * Video duration: 2s to 30s
97
 
98
+ * Video aspect ratio: 1:3 to 3:1
99
 
100
+ * Video formats: mp4, avi, mov
101
 
102
+ * Image file size: Less than 5MB
103
 
104
+ * Image resolution: The shorter side must be greater than 200, and the longer side must be less than 4096
105
 
106
+ * Image formats: jpg, png, jpeg, webp, bmp
107
 
108
+ Current, the inference quality has two variants. You can use our open-source code for more flexible configuration.
109
 
110
+ * wan-pro: 25fps, 720p
111
 
112
+ * wan-std: 15fps, 720p
113
+ """)
114
 
115
+ with gr.Row():
116
+ with gr.Column():
117
+ ref_img = gr.Image(label="Reference Image (изображение)", type="numpy", sources=["upload"])
118
+ video = gr.Video(label="Template Video (шаблонное видео)", sources=["upload"])
119
+ with gr.Row():
120
+ model_id = gr.Dropdown(label="Mode (режим)", choices=["wan2.2-animate-move", "wan2.2-animate-mix"], value="wan2.2-animate-move")
121
+ model = gr.Dropdown(label="Inference Quality (качество)", choices=["wan-pro", "wan-std"], value="wan-pro")
122
+ run_button = gr.Button("Generate Video (генерировать)")
123
 
124
+ with gr.Column():
125
+ output_video = gr.Video(label="Output Video (результат)")
126
+ output_status = gr.Textbox(label="Status (статус)")
127
 
128
+ run_button.click(
129
+ fn=app.predict,
130
+ inputs=[ref_img, video, model_id, model],
131
+ outputs=[output_video, output_status]
132
+ )
133
 
134
+ demo.queue(default_concurrency_limit=1)
135
+ demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
136
 
137
+ if __name__ == "__main__":
138
+ start_app()