Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import torch | |
| import ftfy | |
| import spaces | |
| from diffusers import DiffusionPipeline | |
| # Read token and optional model override from environment | |
| token = os.environ.get("HUGGINGFACE_TOKEN") | |
| if not token: | |
| raise ValueError("Environment variable HUGGINGFACE_TOKEN is not set.") | |
| # Use the Diffusers-ready model repository by default | |
| model_id = os.environ.get("WAN_MODEL_ID", "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers") | |
| # GPU is only activated when this function is called | |
| def generate_video(image, prompt, num_frames=16, steps=25, guidance_scale=7.5): | |
| torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
| # Load pipeline inside the GPU-allocated function | |
| pipe = DiffusionPipeline.from_pretrained( | |
| model_id, | |
| torch_dtype=torch_dtype, | |
| trust_remote_code=True, | |
| use_auth_token=token | |
| ).to("cuda") | |
| pipe.enable_attention_slicing() | |
| # Generate video | |
| output = pipe( | |
| prompt=prompt, | |
| image=image, | |
| num_inference_steps=steps, | |
| guidance_scale=guidance_scale, | |
| num_frames=num_frames | |
| ) | |
| return output.videos | |
| # Gradio UI | |
| def main(): | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Wan2.1 Image-to-Video Demo (ZeroGPU Edition)") | |
| with gr.Row(): | |
| img_in = gr.Image(type="pil", label="Input Image") | |
| txt_p = gr.Textbox(label="Prompt") | |
| btn = gr.Button("Generate Video") | |
| out = gr.Video(label="Generated Video") | |
| btn.click(fn=generate_video, inputs=[img_in, txt_p], outputs=out) | |
| return demo | |
| if __name__ == "__main__": | |
| main().launch() | |