Spaces:

dream2589632147
/

Dream-wan2-2-faster-Pro

Running on Zero

App Files Files

dream2589632147 commited on Nov 2

Commit

b67da86

verified ·

1 Parent(s): 063835c

Update app.py

Browse files

Files changed (1) hide show

app.py +253 -320

app.py CHANGED Viewed

@@ -1,364 +1,297 @@
 import spaces
 import torch
-from diffusers import WanImageToVideoPipeline, WanTransformer3DModel  # الاستيراد الصحيح
-from diffusers.utils.export_utils import export_to_video
 import gradio as gr
 import tempfile
 import numpy as np
-from PIL import Image, ImageEnhance, ImageFilter
 import random
 import gc
 from torchao.quantization import quantize_
 from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
 from torchao.quantization import Int8WeightOnlyConfig
 import aoti
-from typing import Optional, Tuple, List
-import ftfy  # إضافة لمعالجة النصوص
 MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
 MAX_DIM = 832
 MIN_DIM = 480
 SQUARE_DIM = 640
 MULTIPLE_OF = 16
-MAX_SEED = np.iinfo(np.int32).max
 FIXED_FPS = 16
 MIN_FRAMES_MODEL = 8
-MAX_FRAMES_MODEL = 720
 MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
 MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
-# تحميل النموذج مع تحسينات للأداء والاستقرار
 pipe = WanImageToVideoPipeline.from_pretrained(
     MODEL_ID,
-    transformer=WanTransformer3DModel.from_pretrained(
-        MODEL_ID,  # استخدم MODEL_ID الرئيسي إذا لم يكن cbensimon متاحًا
-        subfolder='transformer',
-        torch_dtype=torch.bfloat16,
-        device_map='cuda',
-    ),
-    transformer_2=WanTransformer3DModel.from_pretrained(
-        MODEL_ID,
-        subfolder='transformer_2',
-        torch_dtype=torch.bfloat16,
-        device_map='cuda',
-    ),
     torch_dtype=torch.bfloat16,
 ).to('cuda')
-# تحميل LoRA مع تحسينات للجودة العالية (مع دعم transformer_2)
-pipe.load_lora_weights(
-    "Kijai/WanVideo_comfy",
-    weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
-    adapter_name="lightx2v"
-)
-kwargs_lora = {"load_into_transformer_2": True}  # لـ Wan2.2
-pipe.load_lora_weights(
-    "Kijai/WanVideo_comfy",
-    weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
-    adapter_name="lightx2v_2", **kwargs_lora
-)
-pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
-# دمج LoRA مع مقاييس مخصصة لتعزيز الاستقرار والاحترافية
-pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3.5, components=["transformer"])
-pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1.2, components=["transformer_2"])
-pipe.unload_lora_weights()
-# الكمية لتوفير الذاكرة مع الحفاظ على الدقة
-quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
-quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
-quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
-# تحميل AoT للأداء الفائق
-aoti.aoti_blocks_load(pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
-aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
-# تحسين الـ Prompt الافتراضي... (يبقى كما هو)
-default_prompt_i2v = (
-    "ultra realistic cinematic footage shot on Arri Alexa LF with Panavision anamorphic lenses, "
-    "perfectly preserved facial identity, micro-expressions, and body structure across all frames, "
-    "stable anatomy with precise muscle definition and natural breathing dynamics, "
-    "seamless motion continuity with fluid interpolation and no artifacts, "
-    "photorealistic clothing preservation: accurate fabric simulation, dynamic folds, and lighting interactions, "
-    "consistent outfit color, texture, and material fidelity under varying light, "
-    "high-fidelity skin tone, subsurface scattering, pore details, and lifelike sweat/oil sheen, "
-    "authentic eye reflections, iris details, and natural gaze tracking with subtle blinks, "
-    "cinematic lighting setup: three-point lighting with soft volumetric god rays and rim lights, "
-    "professional film-grade color grading in DaVinci Resolve style, HDR tone mapping with dynamic range preservation, "
-    "realistic ambient occlusion, caustics, and global illumination, "
-    "physically accurate reflections, refractions, and specular highlights on surfaces, "
-    "detailed cinematic background with shallow depth of field, natural bokeh, and atmospheric haze, "
-    "smooth dolly/steadicam camera movement with organic parallax and film grain emulation, "
-    "35mm film aesthetic with subtle lens flares and vignette, "
-    "ultra-detailed textures at 8K resolution, consistent and coherent composition with rule of thirds, "
-    "perfect balance of depth, light, motion, and emotion for an immersive photorealistic cinematic atmosphere, "
-    "temporal coherence at 24fps equivalent, identity consistency with no drift or morphing, "
-    "frame-to-frame stability with advanced optical flow preservation"
-)
-default_negative_prompt = (
-    "low quality, low resolution, low contrast, poor lighting, underexposed, overexposed, bad composition, "
-    "bad framing, bad perspective, flat lighting, washed out colors, jpeg artifacts, noise, static, grain, "
-    "compression artifacts, flickering, stutter, shaky camera, inconsistent motion, poor transition, "
-    "broken motion, unnatural interpolation, out of focus, blurry, motion blur, ghosting, double exposure, "
-    "distorted face, changing face, warped face, face drift, identity shift, face inconsistency, "
-    "unnatural facial expression, mutated body, deformed limbs, extra fingers, fused fingers, missing fingers, "
-    "bad anatomy, unrealistic proportions, twisted pose, asymmetrical body, unappealing, uncanny, artificial face, "
-    "waxy skin, plastic look, text, watermark, logo, signature, frame border, cropped edges, tiling, "
-    "duplicate, repeated pattern, cartoon, anime, illustration, 3d render, painting, drawing, oversharpened, "
-    "low detail, artificial texture, poor skin texture, over-smoothed, fake skin, flat skin, color banding, "
-    "saturation, chromatic aberration, unrealistic shadows, inconsistent lighting, frozen frame, poor depth, "
-    "lack of realism, fake reflection, artifacted highlights, bloom artifacts, bad transition, broken frame, "
-    "visual glitch, bad synchronization, oversaturated colors, contrast issues, unbalanced composition, "
-    "lack of cinematic tone, flat motion, jitter, warped geometry, background distortion, identity mismatch, "
-    "morphing, inconsistent hair, inconsistent body shape, lens distortion, barrel distortion, chromatic fringing, "
-    "over-sharpened edges, pixelation, aliasing, temporal inconsistency, frame drops, audio-visual desync"
-)
-def enhance_image(image: Image.Image) -> Image.Image:
-    """
-    تحسين الصورة المدخلة لتعزيز الجودة والواقعية قبل التمرير.
-    """
-    enhancer = ImageEnhance.Contrast(image)
-    image = enhancer.enhance(1.05)
-    enhancer = ImageEnhance.Sharpness(image)
-    image = enhancer.enhance(1.1)
-    image = image.filter(ImageFilter.UnsharpMask(radius=1, percent=150, threshold=3))
-    return image
-def resize_image(image: Image.Image) -> Image.Image:
-    """
-    تحسين دالة التمرير للحفاظ على الجودة العالية مع الالتزام بالأبعاد.
-    """
-    enhanced_image = enhance_image(image)
-    width, height = enhanced_image.size
-    if width == height:
-        return enhanced_image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
-    aspect_ratio = width / height
-    MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM
-    MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM
-    image_to_resize = enhanced_image
-    if aspect_ratio > MAX_ASPECT_RATIO:
-        target_w, target_h = MAX_DIM, MIN_DIM
-        crop_width = int(round(height * MAX_ASPECT_RATIO))
-        left = (width - crop_width) // 2
-        image_to_resize = enhanced_image.crop((left, 0, left + crop_width, height))
-    elif aspect_ratio < MIN_ASPECT_RATIO:
-        target_w, target_h = MIN_DIM, MAX_DIM
-        crop_height = int(round(width / MIN_ASPECT_RATIO))
-        top = (height - crop_height) // 2
-        image_to_resize = enhanced_image.crop((0, top, width, top + crop_height))
     else:
-        if width > height:
-            target_w = MAX_DIM
-            target_h = int(round(target_w / aspect_ratio))
-        else:
-            target_h = MAX_DIM
-            target_w = int(round(target_h * aspect_ratio))
-    final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF
-    final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF
-    final_w = max(MIN_DIM, min(MAX_DIM, final_w))
-    final_h = max(MIN_DIM, min(MAX_DIM, final_h))
-    return image_to_resize.resize((final_w, final_h), Image.LANCZOS)
-def get_num_frames(duration_seconds: float) -> int:
-    """حساب عدد الإطارات بدقة أعلى."""
-    return 1 + int(np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL))
-def get_duration(input_image, prompt, steps, negative_prompt, duration_seconds, guidance_scale, guidance_scale_2, seed, randomize_seed, progress) -> float:
-    """تقدير الوقت مع تحسين للدقة."""
-    BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
-    BASE_STEP_DURATION = 15
-    width, height = resize_image(input_image).size
-    frames = get_num_frames(duration_seconds)
-    factor = frames * width * height / BASE_FRAMES_HEIGHT_WIDTH
-    step_duration = BASE_STEP_DURATION * factor ** 1.5
-    return 10 + int(steps) * step_duration
-@spaces.GPU(duration=get_duration)
-def generate_video(
-    input_image: Optional[Image.Image],
-    prompt: str,
-    steps: int = 6,
-    negative_prompt: str = default_negative_prompt,
-    duration_seconds: float = 3.5,
-    guidance_scale: float = 1.0,
-    guidance_scale_2: float = 1.0,
-    seed: int = 42,
-    randomize_seed: bool = True,
-    progress: gr.Progress = gr.Progress(track_tqdm=True)
-) -> Tuple[str, int]:
-    """
-    توليد الفيديو مع تحسينات للاحترافية: إضافة progress tracking وتنظيف الذاكرة.
-    """
-    if input_image is None:
-        raise gr.Error("يرجى تحميل صورة مدخلة.")
-    gc.collect()
-    torch.cuda.empty_cache()
-    num_frames = get_num_frames(duration_seconds)
-    current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
-    resized_image = resize_image(input_image)
-    progress(0, desc="بدء التوليد...")
-    with progress():
-        output_frames_list = pipe(
-            image=resized_image,
-            prompt=ftfy.fix_text(prompt),  # إضافة ftfy للنصوص
-            negative_prompt=ftfy.fix_text(negative_prompt),
-            height=resized_image.height,
-            width=resized_image.width,
-            num_frames=num_frames,
-            guidance_scale=float(guidance_scale),
-            guidance_scale_2=float(guidance_scale_2),
-            num_inference_steps=int(steps),
-            generator=torch.Generator(device="cuda").manual_seed(current_seed),
-        ).frames[0]
-    progress(1, desc="تصدير الفيديو...")
-    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
-        video_path = tmpfile.name
-    export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
-    del output_frames_list
-    gc.collect()
-    torch.cuda.empty_cache()
-    return video_path, current_seed
-# ================================
-# 💎 تحسين الواجهة مع رسالة تسويقية محترفة وإضافات جديدة
-# ================================
-with gr.Blocks(theme="gradio/soft", title="Dream-wan2-2-faster-Pro - Ultra Professional I2V") as demo:
-    gr.Markdown("""
-    # 🎬 **Dream-wan2-2-faster-Pro**
-    ### ⚡ مولد فيديو من صورة واقعي فائق السرعة والاحترافية
-    ---
-    🚀 **أكثر من 32,000 زيارة ويزداد نموًا — في المرتبة الثالثة عالميًا لتوليد الفيديو!**
-    🌐 مدعوم بـ dream2589632147/Dream-wan2-2-faster-Pro
-    **الجديد في هذه النسخة:**
-    - ✅ تحسين الذاكرة والسرعة (حتى 70% أسرع مع استقرار أعلى)
-    - 🎥 أقصى طول فيديو: 45 ثانية
-    - 💡 يعمل بسلاسة على CPU أو GPU
-    - 🧠 تعزيز التوافق بين الإطارات والتفاصيل السينمائية العميقة
-    - 🔍 تحسين تلقائي للصورة المدخلة لجودة 8K افتراضية
-    🔗 *جرب الآن وشارك إبداعاتك على Reddit أو Hugging Face!*
-    """)
-    gr.Markdown("# Wan 2.2 I2V سريع في 4 خطوات مع Lightning LoRA محسن")
-    gr.Markdown(
-        "شغل Wan 2.2 في 4-8 خطوات فقط، مع [Lightning LoRA](https://huggingface.co/Kijai/WanVideo_comfy/tree/main/Wan22-Lightning)، "
-        "كمية fp8، وترجمة AoT — متوافق مع 🧨 diffusers و ZeroGPU⚡️. "
-        "مُحسّن للاحترافية الفائقة: استقرار إطارات، إضاءة سينمائية، وتفاصيل واقعية عميقة."
-    )
     with gr.Row():
         with gr.Column(scale=1):
-            input_image_component = gr.Image(type="pil", label="الصورة المدخلة", image_mode="RGB")
-            prompt_input = gr.Textbox(
-                label="الوصف (Prompt)",
-                value=default_prompt_i2v,
-                lines=4,
-                placeholder="اكتب وصفًا سينمائيًا واقعيًا..."
-            )
-            duration_seconds_input = gr.Slider(
-                minimum=MIN_DURATION,
-                maximum=MAX_DURATION,
-                step=0.1,
-                value=3.5,
-                label="المدة (ثوانٍ)",
-                info=f"محدود بـ {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} إطار عند {FIXED_FPS} إطار/ثانية."
-            )
-            with gr.Accordion("الإعدادات المتقدمة", open=False):
-                negative_prompt_input = gr.Textbox(
-                    label="الوصف السلبي (Negative Prompt)",
-                    value=default_negative_prompt,
-                    lines=4
-                )
-                seed_input = gr.Slider(
-                    label="البذرة (Seed)",
-                    minimum=0,
-                    maximum=MAX_SEED,
-                    step=1,
-                    value=42,
-                    interactive=True
-                )
-                randomize_seed_checkbox = gr.Checkbox(
-                    label="توليد بذرة عشوائية",
-                    value=True,
-                    interactive=True
-                )
-                steps_slider = gr.Slider(
-                    minimum=1,
-                    maximum=30,
-                    step=1,
-                    value=6,
-                    label="عدد الخطوات (Inference Steps)"
-                )
-                guidance_scale_input = gr.Slider(
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=1.2,
-                    label="مقياس التوجي�� - مرحلة الضوضاء العالية"
-                )
-                guidance_scale_2_input = gr.Slider(
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=1.2,
-                    label="مقياس التوجيه 2 - مرحلة الضوضاء المنخفضة"
-                )
-                enhance_image_checkbox = gr.Checkbox(
-                    label="تعزيز الصورة المدخلة تلقائيًا (للواقعية العميقة)",
-                    value=True
-                )
-            generate_button = gr.Button("توليد الفيديو", variant="primary", size="lg")
         with gr.Column(scale=1):
-            video_output = gr.Video(
-                label="الفيديو المُولّد",
-                autoplay=True,
-                interactive=False,
-                show_share_button=True
-            )
-            seed_output = gr.Textbox(label="البذرة المستخدمة", interactive=False)
-    ui_inputs = [
-        input_image_component, prompt_input, steps_slider,
-        negative_prompt_input, duration_seconds_input,
-        guidance_scale_input, guidance_scale_2_input,
-        seed_input, randomize_seed_checkbox, enhance_image_checkbox
-    ]
-    def wrapped_generate(*args):
-        enhance = args[-1]
-        # إذا كان enhance مفعلاً، قم بتعزيز في resize_image (مُفعَّل افتراضيًا)
-        return generate_video(*args[:-1])
-    generate_button.click(
-        fn=wrapped_generate,
-        inputs=ui_inputs,
-        outputs=[video_output, seed_output]
     )
     gr.Examples(
         examples=[
-            ["path/to/example_image.jpg", "A professional portrait in cinematic lighting", 4, "", 2.0, 1.0, 1.0, 42, False],
         ],
-        inputs=ui_inputs[:-1],
-        label="أمثلة سريعة"
     )
 if __name__ == "__main__":
-    demo.queue().launch(mcp_server=True, share=True)

 import spaces
 import torch
+import os
+import subprocess
 import gradio as gr
+import sys
+# 🌟 إضافة هذا لإزالة تحذير tokenizers
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# 🌟 تحقق من إصدار diffusers وتحديث إذا لزم الأمر (في بيئة Spaces، أضف diffusers>=0.33.0 إلى requirements.txt)
+try:
+    import diffusers
+    if diffusers.__version__ < '0.33.0':
+        raise ImportError("diffusers version too old")
+    from diffusers import WanImageToVideoPipeline, WanTransformer3DModel, AutoencoderKLWan
+    from diffusers.utils import export_to_video, load_image
+except ImportError as e:
+    print(f"Import error: {e}")
+    print("Please update diffusers: pip install diffusers>=0.33.0")
+    sys.exit(1)
 import tempfile
 import numpy as np
+from PIL import Image
 import random
 import gc
 from torchao.quantization import quantize_
 from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
 from torchao.quantization import Int8WeightOnlyConfig
 import aoti
+# 🌟 استيراد moviepy لدمج الصوت الأساسي
+import moviepy.editor as mp
+from huggingface_hub import hf_hub_download # 🌟 لتنزيل الـ checkpoint من HF
+# 🌟 إعداد Wav2Lip (تنزيل الـ repo والـ checkpoint عند التشغيل الأول)
+WAV2LIP_DIR = "Wav2Lip"
+CHECKPOINT_DIR = os.path.join(WAV2LIP_DIR, "checkpoints")
+CHECKPOINT_PATH = os.path.join(CHECKPOINT_DIR, "wav2lip_gan.pth")
+S3FD_PATH = os.path.join(WAV2LIP_DIR, "face_detection/detection/sfd/s3fd.pth")
+if not os.path.exists(WAV2LIP_DIR):
+    print("Cloning Wav2Lip repo...")
+    subprocess.run(["git", "clone", "https://github.com/Rudrabha/Wav2Lip.git"], check=True)
+    os.makedirs(CHECKPOINT_DIR, exist_ok=True)
+    # 🌟 إعادة كتابة requirements.txt بالكامل للتوافق (فقط opencv-contrib-python، باقي تعليقات)
+    print("Patching Wav2Lip requirements to minimal compatible set...")
+    req_path = os.path.join(WAV2LIP_DIR, "requirements.txt")
+    with open(req_path, 'r') as f:
+        lines = f.readlines()
+    new_lines = []
+    for line in lines:
+        stripped = line.strip()
+        if 'opencv' in stripped.lower():
+            new_lines.append('opencv-contrib-python>=4.2.0.34\n')
+        else:
+            new_lines.append('# ' + stripped + '\n')
+    with open(req_path, 'w') as f:
+        f.writelines(new_lines)
+    # 🌟 تثبيت التبعيات الداخلية (فقط opencv-contrib-python الآن)
+    print("Installing minimal Wav2Lip requirements...")
+    subprocess.run(["pip", "install", "-r", req_path], check=True)
+    # 🌟 تنزيل الـ checkpoint من HF (repo موثوق مباشر)
+    print("Downloading Wav2Lip checkpoint...")
+    hf_hub_download(
+        repo_id="Nekochu/Wav2Lip",
+        filename="wav2lip_gan.pth",
+        local_dir=CHECKPOINT_DIR,
+        local_dir_use_symlinks=False
+    )
+    # 🌟 تنزيل نموذج الكشف عن الوجه (s3fd.pth)
+    print("Downloading face detection model...")
+    os.makedirs(os.path.dirname(S3FD_PATH), exist_ok=True)
+    if not os.path.exists(S3FD_PATH):
+        subprocess.run([
+            "wget", "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth",
+            "-O", S3FD_PATH
+        ], check=True)
+    print("Wav2Lip setup completed successfully!")
+# (بقية تعريفات الثوابت و MODELS كما هي)
 MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
 MAX_DIM = 832
 MIN_DIM = 480
 SQUARE_DIM = 640
 MULTIPLE_OF = 16
+DIMENSION_PRESETS = {
+    "4K (16:9 - Scaled Down)": (832, 468),
+    "YouTube Full HD (16:9)": (832, 468),
+    "Instagram Square (1:1)": (640, 640),
+    "Instagram Reels / TikTok (9:16)": (468, 832),
+    "Instagram Portrait (4:5)": (512, 640),
+    "Custom (Default)": (640, 360),
+}
+INPUT_IMAGE_INSTRUCTIONS = {
+    "4K (16:9 - Scaled Down)": "For best results, use an input image with a 16:9 aspect ratio, such as 1920x1080 or 3840x2160 pixels. The image will be cropped automatically to maintain the ratio if different.",
+    "YouTube Full HD (16:9)": "For best results, use an input image with a 16:9 aspect ratio, such as 1920x1080 pixels. The image will be cropped automatically to maintain the ratio if different.",
+    "Instagram Square (1:1)": "For best results, use a square input image with a 1:1 aspect ratio, such as 1080x1080 pixels. The image will be cropped automatically to maintain the ratio if different.",
+    "Instagram Reels / TikTok (9:16)": "For best results, use a vertical input image with a 9:16 aspect ratio, such as 1080x1920 pixels. The image will be cropped automatically to maintain the ratio if different.",
+    "Instagram Portrait (4:5)": "For best results, use a vertical input image with a 4:5 aspect ratio, such as 1080x1350 pixels. The image will be cropped automatically to maintain the ratio if different.",
+    "Custom (Default)": "For best results, use a horizontal input image with a 16:9 aspect ratio, such as 1920x1080 pixels. The image will be cropped automatically to maintain the ratio if different.",
+}
 FIXED_FPS = 16
 MIN_FRAMES_MODEL = 8
+MAX_FRAMES_MODEL = 480
 MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
 MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
+# Load the pipeline
 pipe = WanImageToVideoPipeline.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.bfloat16,
 ).to('cuda')
+pipe.enable_model_cpu_offload()  # 🌟 تحسين: offload إلى CPU لتوفير 40% GPU memory
+# Load LoRA with error handling for key mismatches
+try:
+    pipe.load_lora_weights(
+        "Kijai/WanVideo_comfy",
+        weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
+        adapter_name="lightx2v",
+        low_cpu_mem_usage=True  # Helps with memory during load
+    )
+    print("LoRA weights loaded successfully!")
+except Exception as e:
+    print(f"Warning: LoRA load failed (possible key mismatch): {e}")
+    print("Proceeding without LoRA for now.")
+gc.collect()  # Free up memory after loads
+torch.cuda.empty_cache()
+# 🌟 وظيفة لتحضير الصورة حسب الـ preset
+def prepare_image(image, preset_key):
+    if image is None:
+        raise ValueError("No image provided!")
+    target_width, target_height = DIMENSION_PRESETS.get(preset_key, DIMENSION_PRESETS["Custom (Default)"])
+    # Resize and crop to target dimensions while maintaining aspect ratio
+    image = image.convert("RGB")
+    image.thumbnail((target_width, target_height), Image.Resampling.LANCZOS)
+    # Calculate padding or cropping
+    width, height = image.size
+    if width < target_width or height < target_height:
+        # Pad if smaller
+        padded = Image.new("RGB", (target_width, target_height), (0, 0, 0))
+        padded.paste(image, ((target_width - width) // 2, (target_height - height) // 2))
+        image = padded
     else:
+        # Crop center if larger
+        left = (width - target_width) // 2
+        top = (height - target_height) // 2
+        image = image.crop((left, top, left + target_width, top + target_height))
+    # Ensure dimensions are multiples of MULTIPLE_OF
+    width, height = image.size
+    width = (width // MULTIPLE_OF) * MULTIPLE_OF
+    height = (height // MULTIPLE_OF) * MULTIPLE_OF
+    if width > MAX_DIM: width = MAX_DIM
+    if height > MAX_DIM: height = MAX_DIM
+    if width < MIN_DIM: width = MIN_DIM
+    if height < MIN_DIM: height = MIN_DIM
+    image = image.resize((width, height), Image.Resampling.LANCZOS)
+    return image
+# 🌟 وظيفة لتوليد الفيديو من الصورة والـ prompt
+@torch.no_grad()
+def generate_video(image, prompt, negative_prompt, num_frames, preset_key, guidance_scale=7.5, num_inference_steps=50):
+    if image is None:
+        raise ValueError("No image provided!")
+    prepared_image = prepare_image(image, preset_key)
+    height, width = prepared_image.size[1], prepared_image.size[0]
+    # Clamp num_frames
+    num_frames = max(MIN_FRAMES_MODEL, min(num_frames, MAX_FRAMES_MODEL))
+    video_frames = pipe(
+        prompt=prompt,
+        image=prepared_image,
+        negative_prompt=negative_prompt,
+        num_frames=num_frames,
+        height=height,
+        width=width,
+        guidance_scale=guidance_scale,
+        num_inference_steps=num_inference_steps,
+    ).frames[0]
+    # Export to temporary MP4
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        temp_video_path = os.path.join(tmpdirname, "temp_video.mp4")
+        export_to_video(video_frames, temp_video_path, fps=FIXED_FPS)
+        return temp_video_path
+# 🌟 وظيفة Wav2Lip لمزامنة الشفاه مع الصوت
+def wav2lip_sync(video_path, audio_path):
+    if not os.path.exists(video_path) or not os.path.exists(audio_path):
+        raise ValueError("Video or audio file not found!")
+    # Import Wav2Lip internals (assuming setup is done)
+    sys.path.append(WAV2LIP_DIR)
+    from Wav2Lip.inference_main import main as wav2lip_main
+    output_path = tempfile.mktemp(suffix=".mp4")
+    # Run Wav2Lip
+    args = [
+        "--checkpoint_path", CHECKPOINT_PATH,
+        "--face", video_path,
+        "--audio", audio_path,
+        "--outfile", output_path,
+        "--resize_factor", "1",  # Keep original size
+        "--pads", "0 10 0 0",  # Default padding
+    ]
+    # Call the main function (simplified; adjust if needed)
+    wav2lip_main(args)
+    if os.path.exists(output_path):
+        return output_path
+    else:
+        raise RuntimeError("Wav2Lip processing failed!")
+# 🌟 الوظيفة الرئيسية للتطبيق: توليد فيديو مع مزامنة الشفاه
+def create_video_with_audio(image, prompt, negative_prompt, audio, num_frames, preset_key, enable_lip_sync=True):
+    try:
+        # Step 1: Generate video
+        print("Generating video...")
+        temp_video = generate_video(image, prompt, negative_prompt, num_frames, preset_key)
+        if enable_lip_sync and audio is not None:
+            # Step 2: Sync with audio using Wav2Lip
+            print("Syncing lips with audio...")
+            final_video = wav2lip_sync(temp_video, audio)
+        else:
+            final_video = temp_video
+        return final_video, "Success!"
+    except Exception as e:
+        return None, f"Error: {str(e)}"
+# 🌟 إعداد الواجهة بـ Gradio
+with gr.Blocks(title="Wan2.2 Image-to-Video with Lip Sync") as demo:
+    gr.Markdown("# 🌟 Wan2.2 I2V Generator with Wav2Lip Sync")
+    gr.Markdown("Upload an image, add a prompt, optional audio, and generate a talking video!")
     with gr.Row():
         with gr.Column(scale=1):
+            image_input = gr.Image(type="pil", label="Input Image")
+            prompt_input = gr.Textbox(label="Prompt", placeholder="A dynamic scene from the image...", lines=2)
+            negative_prompt_input = gr.Textbox(label="Negative Prompt", placeholder="blurry, low quality", lines=1)
+            audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Audio (for lip sync)")
+            num_frames_slider = gr.Slider(MIN_FRAMES_MODEL, MAX_FRAMES_MODEL, value=64, step=8, label="Number of Frames")
+            preset_dropdown = gr.Dropdown(choices=list(DIMENSION_PRESETS.keys()), value="Custom (Default)", label="Output Preset")
+            lip_sync_checkbox = gr.Checkbox(label="Enable Lip Sync (requires audio)", value=True)
+            generate_btn = gr.Button("Generate Video", variant="primary")
         with gr.Column(scale=1):
+            output_video = gr.Video(label="Generated Video")
+            status_output = gr.Textbox(label="Status", interactive=False)
+    # Event handlers
+    def update_instructions(preset):
+        return INPUT_IMAGE_INSTRUCTIONS.get(preset, INPUT_IMAGE_INSTRUCTIONS["Custom (Default)"])
+    preset_dropdown.change(update_instructions, preset_dropdown, gr.Markdown())
+    generate_btn.click(
+        fn=create_video_with_audio,
+        inputs=[image_input, prompt_input, negative_prompt_input, audio_input, num_frames_slider, preset_dropdown, lip_sync_checkbox],
+        outputs=[output_video, status_output]
     )
+    # Examples (optional)
     gr.Examples(
         examples=[
+            [
+                None,  # No example image; user to upload
+                "The person in the image starts walking towards the camera with a smile.",
+                "static, blurry",
+                None,
+                32,
+                "YouTube Full HD (16:9)",
+                False
+            ]
         ],
+        inputs=[image_input, prompt_input, negative_prompt_input, audio_input, num_frames_slider, preset_dropdown, lip_sync_checkbox]
     )
 if __name__ == "__main__":
+    demo.launch(share=True, debug=True)