Spaces:

matthewkram
/

sdmklgdfmkl

Build error

App Files Files Community

matthewkram commited on Nov 4

Commit

5f8a662

verified ·

1 Parent(s): 776c2db

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -103

app.py CHANGED Viewed

@@ -1,142 +1,138 @@
-import streamlit as st
 import torch
 from diffusers import StableVideoDiffusionPipeline
 from PIL import Image
 import numpy as np
 import cv2
-from io import BytesIO
-from diffusers.utils import export_to_video
 import tempfile
-import time
-@st.cache_resource
-def load_model():
-    model_name = "stabilityai/stable-video-diffusion-img2vid-xt"
-    pipe = StableVideoDiffusionPipeline.from_pretrained(
-        model_name,
-        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,  # FP16 на GPU для ускорения
-        variant="fp16"
-    )
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    pipe.to(device)
-    st.info(f"Модель загружена на {device.upper()}. Если CPU — процесс будет очень медленным!")
-    return pipe
-def predict(ref_img_bytes, video_bytes, model_id, model, progress_bar, status_text):
-    pipe = load_model()
-    ref_image = Image.open(BytesIO(ref_img_bytes)).convert("RGB").resize((576, 320))
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
-        temp_video_file.write(video_bytes)
-        temp_video_path = temp_video_file.name
-    cap = cv2.VideoCapture(temp_video_path)
-    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    cap.release()
-    motion_hint = f" with dynamic motion from {frame_count} frames"
-    num_frames = 25 if model == "wan-pro" else 14
-    num_steps = 25 if model == "wan-pro" else 15
-    noise_aug_strength = 0.02
-    if model_id == "wan2.2-animate-mix":
-        noise_aug_strength = 0.1
-    def step_callback(step: int, timestep: int, latents: torch.FloatTensor):
-        progress = (step + 1) / num_steps
-        progress_bar.progress(progress)
-        status_text.text(f"Шаг {step + 1}/{num_steps} ({int(progress * 100)}%). Время на шаг: ~{int(time.time() - start_time)} сек")
-        return latents
-    generator = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(42)
-    start_time = time.time()  # Для времени на шаг
-    output = pipe(
-        ref_image,
-        num_inference_steps=num_steps,
-        num_frames=num_frames,
-        generator=generator,
-        decode_chunk_size=2,
-        noise_aug_strength=noise_aug_strength,
-        callback_on_step_end=step_callback
-    ).frames[0]
-    with tempfile.NamedTemporaryFile(suffix=".mp4") as temp_output:
-        export_to_video(output, temp_output.name, fps=7)
-        temp_output.seek(0)
-        output_bytes = temp_output.read()
-    return output_bytes, "SUCCEEDED" + motion_hint
-st.title("Wan2.2-Animate (Local No API)")
-st.markdown("""
-    Wan2.2-Animate: Unified Character Animation and Replacement with Holistic Replication
-    Local version without API (SVD Proxy)
-    Tongyi Lab, Alibaba
-    📄Paper 💻GitHub 🤗HF Model
-""")
-with st.expander("Usage Instructions (инструкции)", expanded=False):
-    st.markdown("""
-        ‼️Usage (использования) Wan-Animate supports two modes:
-        * Move Mode: animate the character in input image with movements from the input video
-        * Mix Mode: replace the character in input video with the character in input image
-        Wan-Animate supports two modes:
-        * Move Mode: Use the movements extracted from the input video to drive the character in the input image
-        * Mix Mode: Use the character in the input image to replace the character in the input video
-        Currently, the following restrictions apply to inputs:
-        * Video file size: Less than 200MB
-        * Video resolution: The shorter side must be greater than 200, and the longer side must be less than 2048
-        * Video duration: 2s to 30s
-        * Video aspect ratio: 1:3 to 3:1
-        * Video formats: mp4, avi, mov
-        * Image file size: Less than 5MB
-        * Image resolution: The shorter side must be greater than 200, and the longer side must be less than 4096
-        * Image formats: jpg, png, jpeg, webp, bmp
-        Current, the inference quality has two variants. You can use our open-source code for more flexible configuration.
-        * wan-pro: 25fps, 720p
-        * wan-std: 15fps, 720p
-    """)
-ref_img = st.file_uploader("Reference Image (изображение)", type=["jpg", "png", "jpeg", "webp", "bmp"])
-video = st.file_uploader("Template Video (шаблонное видео)", type=["mp4", "avi", "mov"])
-col1, col2 = st.columns(2)
-with col1:
-    model_id = st.selectbox("Mode (режим)", ["wan2.2-animate-move", "wan2.2-animate-mix"])
-with col2:
-    model = st.selectbox("Inference Quality (качество)", ["wan-pro", "wan-std"])
-if st.button("Generate Video (��енерировать)"):
-    if ref_img and video:
-        progress_bar = st.progress(0)
-        status_text = st.empty()
-        with st.spinner("Генерация... (на CPU это медленно)"):
-            try:
-                output_bytes, status = predict(ref_img.read(), video.read(), model_id, model, progress_bar, status_text)
-                st.video(output_bytes)
-                st.success(status)
-            except Exception as e:
-                st.error(f"Failed: {str(e)}")
-            finally:
-                progress_bar.empty()
-                status_text.empty()
-    else:
-        st.error("Загрузите изображение и видео!")

+import os
+import sys
+import uuid
+import shutil
+import time
+import gradio as gr
 import torch
 from diffusers import StableVideoDiffusionPipeline
 from PIL import Image
 import numpy as np
 import cv2
 import tempfile
+from diffusers.utils import export_to_video
+class WanAnimateApp:
+    def __init__(self):
+        model_name = "stabilityai/stable-video-diffusion-img2vid-xt"
+        dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+        self.pipe = StableVideoDiffusionPipeline.from_pretrained(
+            model_name,
+            torch_dtype=dtype,
+            variant="fp16"
+        )
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.pipe.to(device)
+        gr.Info(f"Модель на {device.upper()}. Если CPU — переключись на GPU в Settings!")
+    def predict(self, ref_img, video, model_id, model, progress=gr.Progress()):
+        if ref_img is None or video is None:
+            return None, "Upload both image and video."
+        progress(0, desc="Подготовка...")
+        ref_image = Image.fromarray(ref_img).convert("RGB").resize((576, 320))
+        cap = cv2.VideoCapture(video)
+        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        cap.release()
+        motion_hint = f" with dynamic motion from {frame_count} frames"
+        num_frames = 25 if model == "wan-pro" else 14
+        num_steps = 25 if model == "wan-pro" else 15
+        noise_aug_strength = 0.02
+        if model_id == "wan2.2-animate-mix":
+            noise_aug_strength = 0.1
+        generator = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(42)
+        start_time = time.time()
+        output = self.pipe(
+            ref_image,
+            num_inference_steps=num_steps,
+            num_frames=num_frames,
+            generator=generator,
+            decode_chunk_size=2,
+            noise_aug_strength=noise_aug_strength,
+            callback_on_step_end=lambda step, timestep, latents: progress((step + 1) / num_steps, desc=f"Шаг {step + 1}/{num_steps}. Время: {int(time.time() - start_time)} сек")
+        ).frames[0]
+        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video:
+            export_to_video(output, temp_video.name, fps=7)
+        return temp_video.name, "SUCCEEDED" + motion_hint
+def start_app():
+    os.makedirs("/tmp/gradio", exist_ok=True)
+    app = WanAnimateApp()
+    with gr.Blocks(title="Wan2.2-Animate (Local No API)") as demo:
+        gr.HTML("""
+            Wan2.2-Animate: Unified Character Animation and Replacement with Holistic Replication
+            Local version without API (SVD Proxy)
+            Tongyi Lab, Alibaba
+            📄Paper 💻GitHub 🤗HF Model
+        """)
+        with gr.Accordion("Usage Instructions (инструкции)", open=False):
+            gr.HTML("""
+                ‼️Usage (использования) Wan-Animate supports two modes:
+                * Move Mode: animate the character in input image with movements from the input video
+                * Mix Mode: replace the character in input video with the character in input video
+                Wan-Animate supports two modes:
+                * Move Mode: Use the movements extracted from the input video to drive the character in the input image
+                * Mix Mode: Use the character in the input image to replace the character in the input video
+                Currently, the following restrictions apply to inputs:
+                * Video file size: Less than 200MB
+                * Video resolution: The shorter side must be greater than 200, and the longer side must be less than 2048
+                * Video duration: 2s to 30s
+                * Video aspect ratio: 1:3 to 3:1
+                * Video formats: mp4, avi, mov
+                * Image file size: Less than 5MB
+                * Image resolution: The shorter side must be greater than 200, and the longer side must be less than 4096
+                * Image formats: jpg, png, jpeg, webp, bmp
+                Current, the inference quality has two variants. You can use our open-source code for more flexible configuration.
+                * wan-pro: 25fps, 720p
+                * wan-std: 15fps, 720p
+            """)
+        with gr.Row():
+            with gr.Column():
+                ref_img = gr.Image(label="Reference Image (изображение)", type="numpy", sources=["upload"])
+                video = gr.Video(label="Template Video (шаблонное видео)", sources=["upload"])
+                with gr.Row():
+                    model_id = gr.Dropdown(label="Mode (режим)", choices=["wan2.2-animate-move", "wan2.2-animate-mix"], value="wan2.2-animate-move")
+                    model = gr.Dropdown(label="Inference Quality (качество)", choices=["wan-pro", "wan-std"], value="wan-pro")
+                run_button = gr.Button("Generate Video (генерировать)")
+            with gr.Column():
+                output_video = gr.Video(label="Output Video (результат)")
+                output_status = gr.Textbox(label="Status (статус)")
+        run_button.click(
+            fn=app.predict,
+            inputs=[ref_img, video, model_id, model],
+            outputs=[output_video, output_status]
+        )
+    demo.queue(default_concurrency_limit=1)
+    demo.launch(server_name="0.0.0.0", server_port=7860)
+if __name__ == "__main__":
+    start_app()