Spaces:

dream2589632147
/

Dream-wan2-2-faster-Pro

Running on Zero

App Files Files

dream2589632147 commited on Nov 2

Commit

60ba488

verified ·

1 Parent(s): 7abbdaa

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -70

app.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import spaces
 import torch
 import os
-import subprocess
-import gradio as gr
 import sys
 # 🌟 إضافة هذا لإزالة تحذير tokenizers
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
@@ -20,7 +18,6 @@ except ImportError as e:
 import tempfile
 import numpy as np
 from PIL import Image
-import random
 import gc
 # (بقية تعريفات الثوابت و MODELS كما هي)
 MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
@@ -49,26 +46,6 @@ MIN_FRAMES_MODEL = 8
 MAX_FRAMES_MODEL = 720  # 45 ثانية عند 16 FPS (45 * 16 = 720)
 MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
 MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
-# Load the pipeline
-pipe = WanImageToVideoPipeline.from_pretrained(
-    MODEL_ID,
-    torch_dtype=torch.bfloat16,
-).to('cuda')
-pipe.enable_model_cpu_offload() # 🌟 تحسين: offload إلى CPU لتوفير 40% GPU memory
-# Load LoRA with error handling for key mismatches
-try:
-    pipe.load_lora_weights(
-        "Kijai/WanVideo_comfy",
-        weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
-        adapter_name="lightx2v",
-        low_cpu_mem_usage=True # Helps with memory during load
-    )
-    print("LoRA weights loaded successfully!")
-except Exception as e:
-    print(f"Warning: LoRA load failed (possible key mismatch): {e}")
-    print("Proceeding without LoRA for now.")
-gc.collect() # Free up memory after loads
-torch.cuda.empty_cache()
 # 🌟 وظيفة لتحضير الصورة حسب الـ preset
 def prepare_image(image, preset_key):
     if image is None:
@@ -104,69 +81,84 @@ def prepare_image(image, preset_key):
     image = image.resize((width, height), Image.Resampling.LANCZOS)
     return image
-# 🌟 وظيفة لتوليد الفيديو من الصورة والـ prompt
 @torch.no_grad()
-def generate_video(image, prompt, negative_prompt, num_frames, preset_key, guidance_scale=7.5, num_inference_steps=20):  # Reduced default steps to 20 for faster/less memory
-    if image is None:
-        raise ValueError("No image provided!")
-    prepared_image = prepare_image(image, preset_key)
-    height, width = prepared_image.size[1], prepared_image.size[0]
-    # Clamp num_frames
-    num_frames = max(MIN_FRAMES_MODEL, min(num_frames, MAX_FRAMES_MODEL))
-    # Memory check and cleanup before generation
-    if torch.cuda.is_available():
-        print(f"GPU Memory before generation: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
-        torch.cuda.empty_cache()
-    video_frames = pipe(
-        prompt=prompt,
-        image=prepared_image,
-        negative_prompt=negative_prompt,
-        num_frames=num_frames,
-        height=height,
-        width=width,
-        guidance_scale=guidance_scale,
-        num_inference_steps=num_inference_steps,
-    ).frames[0]
-    # Export to temporary MP4
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        temp_video_path = os.path.join(tmpdirname, "temp_video.mp4")
-        export_to_video(video_frames, temp_video_path, fps=FIXED_FPS)
-        return temp_video_path
-# 🌟 الوظيفة الرئيسية للتطبيق: توليد فيديو من صورة ونص فقط
-@spaces.GPU # إضافة هذا لتفعيل GPU في Hugging Face Spaces
-def generate_video_only(image, prompt, negative_prompt, num_frames, preset_key):
     try:
-        # توليد الفيديو
         print("Generating video...")
-        final_video = generate_video(image, prompt, negative_prompt, num_frames, preset_key)
         # Cleanup after generation
         gc.collect()
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
-        return final_video, "Success!"
     except torch.cuda.OutOfMemoryError:
-        return None, "Error: Out of GPU memory. Try reducing frames or resolution."
     except Exception as e:
         return None, f"Error: {str(e)}"
 # 🌟 إعداد الواجهة بـ Gradio
 with gr.Blocks(title="Wan2.2 Image-to-Video Generator") as demo:
     gr.Markdown("# 🌟 Wan2.2 I2V Generator")
-    gr.Markdown("Upload an image, add a prompt, and generate a video! Note: For T4 GPU, use <32 frames for best results.")
     with gr.Row():
         with gr.Column(scale=1):
             image_input = gr.Image(type="pil", label="Input Image")
             prompt_input = gr.Textbox(label="Prompt", placeholder="A dynamic scene from the image...", lines=2)
             negative_prompt_input = gr.Textbox(label="Negative Prompt", placeholder="blurry, low quality", lines=1)
-            num_frames_slider = gr.Slider(MIN_FRAMES_MODEL, MAX_FRAMES_MODEL, value=16, step=8, label="Number of Frames (Max 45s at 16 FPS)")  # Default to 16 to avoid OOM
             preset_dropdown = gr.Dropdown(choices=list(DIMENSION_PRESETS.keys()), value="Custom (Default)", label="Output Preset")
             generate_btn = gr.Button("Generate Video", variant="primary")
         with gr.Column(scale=1):
@@ -181,7 +173,7 @@ with gr.Blocks(title="Wan2.2 Image-to-Video Generator") as demo:
     generate_btn.click(
         fn=generate_video_only,
-        inputs=[image_input, prompt_input, negative_prompt_input, num_frames_slider, preset_dropdown],
         outputs=[output_video, status_output]
     )
@@ -189,11 +181,11 @@ with gr.Blocks(title="Wan2.2 Image-to-Video Generator") as demo:
     gr.Examples(
         examples=[
             [
-                None, # No example image; user to upload
                 "The person in the image starts walking towards the camera with a smile.",
                 "static, blurry",
-                16,  # Reduced for example
-                "YouTube Full HD (16:9)"
             ]
         ],
         inputs=[image_input, prompt_input, negative_prompt_input, num_frames_slider, preset_dropdown]

 import spaces
 import torch
 import os
 import sys
 # 🌟 إضافة هذا لإزالة تحذير tokenizers
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 import tempfile
 import numpy as np
 from PIL import Image
 import gc
 # (بقية تعريفات الثوابت و MODELS كما هي)
 MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
 MAX_FRAMES_MODEL = 720  # 45 ثانية عند 16 FPS (45 * 16 = 720)
 MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
 MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
 # 🌟 وظيفة لتحضير الصورة حسب الـ preset
 def prepare_image(image, preset_key):
     if image is None:
     image = image.resize((width, height), Image.Resampling.LANCZOS)
     return image
+# 🌟 الوظيفة الرئيسية للتطبيق: توليد فيديو من صورة ونص فقط (مع lazy loading للنموذج)
+@spaces.GPU(duration=600)  # 10 دقائق timeout للسماح بوقت أطول
 @torch.no_grad()
+def generate_video_only(image, prompt, negative_prompt, num_frames, preset_key, guidance_scale=7.5, num_inference_steps=10):  # Reduced steps to 10
     try:
+        # Lazy load the pipeline inside the function to avoid startup issues
+        print("Loading model...")
+        pipe = WanImageToVideoPipeline.from_pretrained(
+            MODEL_ID,
+            torch_dtype=torch.bfloat16,
+        ).to('cuda')
+        pipe.enable_model_cpu_offload()  # Offload to CPU for memory savings
+        # Optional: Load LoRA if possible
+        try:
+            pipe.load_lora_weights(
+                "Kijai/WanVideo_comfy",
+                weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
+                adapter_name="lightx2v",
+                low_cpu_mem_usage=True
+            )
+            print("LoRA weights loaded successfully!")
+        except Exception as e:
+            print(f"Warning: LoRA load failed: {e}")
+            print("Proceeding without LoRA.")
+        # Memory cleanup before generation
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            print(f"GPU Memory before generation: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
+        # Prepare image and generate
+        prepared_image = prepare_image(image, preset_key)
+        height, width = prepared_image.size[1], prepared_image.size[0]
+        num_frames = max(MIN_FRAMES_MODEL, min(num_frames, MAX_FRAMES_MODEL))
         print("Generating video...")
+        video_frames = pipe(
+            prompt=prompt,
+            image=prepared_image,
+            negative_prompt=negative_prompt,
+            num_frames=num_frames,
+            height=height,
+            width=width,
+            guidance_scale=guidance_scale,
+            num_inference_steps=num_inference_steps,
+        ).frames[0]
+        # Export to temporary MP4
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            temp_video_path = os.path.join(tmpdirname, "temp_video.mp4")
+            export_to_video(video_frames, temp_video_path, fps=FIXED_FPS)
         # Cleanup after generation
+        del pipe  # Delete to free memory
         gc.collect()
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+        return temp_video_path, "Success! Video generated."
     except torch.cuda.OutOfMemoryError:
+        return None, "Error: Out of GPU memory. Try fewer frames (e.g., 8) or lower resolution."
     except Exception as e:
         return None, f"Error: {str(e)}"
 # 🌟 إعداد الواجهة بـ Gradio
 with gr.Blocks(title="Wan2.2 Image-to-Video Generator") as demo:
     gr.Markdown("# 🌟 Wan2.2 I2V Generator")
+    gr.Markdown("Upload an image, add a prompt, and generate a video! **Tip: Start with 8 frames on free T4 GPU to avoid timeouts.**")
     with gr.Row():
         with gr.Column(scale=1):
             image_input = gr.Image(type="pil", label="Input Image")
             prompt_input = gr.Textbox(label="Prompt", placeholder="A dynamic scene from the image...", lines=2)
             negative_prompt_input = gr.Textbox(label="Negative Prompt", placeholder="blurry, low quality", lines=1)
+            num_frames_slider = gr.Slider(MIN_FRAMES_MODEL, 32, value=8, step=8, label="Number of Frames (Start low to test)")  # Limited to 32 max for free tier
             preset_dropdown = gr.Dropdown(choices=list(DIMENSION_PRESETS.keys()), value="Custom (Default)", label="Output Preset")
+            steps_slider = gr.Slider(5, 20, value=10, step=5, label="Inference Steps (Lower = Faster)")
             generate_btn = gr.Button("Generate Video", variant="primary")
         with gr.Column(scale=1):
     generate_btn.click(
         fn=generate_video_only,
+        inputs=[image_input, prompt_input, negative_prompt_input, num_frames_slider, preset_dropdown, gr.State(7.5), steps_slider],  # Added steps
         outputs=[output_video, status_output]
     )
     gr.Examples(
         examples=[
             [
+                None,  # No example image; user to upload
                 "The person in the image starts walking towards the camera with a smile.",
                 "static, blurry",
+                8,
+                "Custom (Default)"
             ]
         ],
         inputs=[image_input, prompt_input, negative_prompt_input, num_frames_slider, preset_dropdown]