Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import spaces | |
| import torch | |
| from diffusers import DiffusionPipeline | |
| import numpy as np | |
| from PIL import Image | |
| import tempfile | |
| import os | |
| from moviepy.editor import ImageSequenceClip, AudioFileClip | |
| import soundfile as sf | |
| from transformers import pipeline | |
| import time | |
| from typing import List, Tuple, Optional | |
| import json | |
| from config import Config | |
| from utils import VideoGenerator, AudioGenerator, ImageGenerator | |
| # Initialize generators | |
| image_gen = ImageGenerator() | |
| audio_gen = AudioGenerator() | |
| video_gen = VideoGenerator() | |
| def compile_transformer(): | |
| """Compile the Stable Diffusion transformer for faster inference""" | |
| with spaces.aoti_capture(image_gen.pipe.transformer) as call: | |
| image_gen.pipe("test compilation prompt") | |
| exported = torch.export.export( | |
| image_gen.pipe.transformer, | |
| args=call.args, | |
| kwargs=call.kwargs, | |
| ) | |
| return spaces.aoti_compile(exported) | |
| # Compile during startup | |
| print("Compiling AI models for optimal performance...") | |
| compiled_transformer = compile_transformer() | |
| spaces.aoti_apply(compiled_transformer, image_gen.pipe.transformer) | |
| print("β Models compiled successfully!") | |
| def generate_video( | |
| prompt: str, | |
| duration: int, | |
| fps: int, | |
| audio_type: str, | |
| voice_gender: str, | |
| music_style: str, | |
| num_images: int, | |
| image_size: int, | |
| motion_strength: float, | |
| progress=gr.Progress() | |
| ) -> str: | |
| """ | |
| Generate a video from text prompt with AI-generated images and audio | |
| Args: | |
| prompt: Text description for the video content | |
| duration: Duration of the video in seconds | |
| fps: Frames per second for the video | |
| audio_type: Type of audio to generate (narration/music/both) | |
| voice_gender: Gender for voice narration | |
| music_style: Style of background music | |
| num_images: Number of unique images to generate | |
| image_size: Size of generated images | |
| motion_strength: Strength of motion between frames | |
| Returns: | |
| Path to the generated video file | |
| """ | |
| try: | |
| progress(0.1, desc="Starting video generation...") | |
| # Calculate timing | |
| total_frames = duration * fps | |
| frames_per_image = total_frames // num_images | |
| progress(0.2, desc="Generating images...") | |
| # Generate images | |
| images = [] | |
| for i in range(num_images): | |
| # Slightly vary the prompt for each image | |
| varied_prompt = f"{prompt}, frame {i+1}, cinematic lighting" | |
| image = image_gen.generate_image( | |
| prompt=varied_prompt, | |
| size=(image_size, image_size) | |
| ) | |
| images.append(image) | |
| progress(0.2 + (i / num_images) * 0.3, desc=f"Generated image {i+1}/{num_images}") | |
| progress(0.5, desc="Generating audio...") | |
| # Generate audio | |
| audio_path = None | |
| if audio_type in ["narration", "both"]: | |
| narration_path = audio_gen.generate_narration( | |
| text=prompt, | |
| gender=voice_gender, | |
| duration=duration | |
| ) | |
| audio_path = narration_path | |
| if audio_type in ["music", "both"]: | |
| music_path = audio_gen.generate_music( | |
| style=music_style, | |
| duration=duration | |
| ) | |
| if audio_path and audio_type == "both": | |
| # Mix narration and music | |
| audio_path = audio_gen.mix_audio(audio_path, music_path) | |
| elif not audio_path: | |
| audio_path = music_path | |
| progress(0.7, desc="Creating video frames...") | |
| # Create video frames with motion | |
| video_frames = video_gen.create_motion_frames( | |
| images=images, | |
| frames_per_image=frames_per_image, | |
| motion_strength=motion_strength | |
| ) | |
| progress(0.9, desc="Composing final video...") | |
| # Create video | |
| video_path = video_gen.create_video( | |
| frames=video_frames, | |
| fps=fps, | |
| audio_path=audio_path, | |
| duration=duration | |
| ) | |
| progress(1.0, desc="Video generation complete!") | |
| return video_path | |
| except Exception as e: | |
| raise gr.Error(f"Error generating video: {str(e)}") | |
| def generate_sample_image(prompt: str, style: str) -> Image.Image: | |
| """Generate a sample image for preview""" | |
| styled_prompt = f"{prompt}, {style} style, high quality, detailed" | |
| return image_gen.generate_image( | |
| prompt=styled_prompt, | |
| size=(512, 512) | |
| ) | |
| def create_demo(): | |
| """Create the Gradio demo interface""" | |
| with gr.Blocks( | |
| title="AI Video Generator", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| } | |
| .header-text { | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| } | |
| .preview-box { | |
| border: 2px dashed #ccc; | |
| border-radius: 10px; | |
| padding: 20px; | |
| text-align: center; | |
| } | |
| """ | |
| ) as demo: | |
| gr.HTML(""" | |
| <div class="header-text"> | |
| <h1>π¬ AI Video Generator</h1> | |
| <p>Create stunning videos from text prompts using AI-powered image and audio generation</p> | |
| <p><a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">Built with anycoder</a></p> | |
| </div> | |
| """) | |
| with gr.Tabs(): | |
| with gr.TabItem("π₯ Generate Video"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π Video Description") | |
| prompt_input = gr.Textbox( | |
| label="Enter your video concept", | |
| placeholder="A serene landscape with mountains and a lake at sunset...", | |
| lines=3, | |
| value="A beautiful forest with sunlight filtering through the trees, birds flying, peaceful nature scene" | |
| ) | |
| gr.Markdown("### βοΈ Video Settings") | |
| with gr.Row(): | |
| duration_slider = gr.Slider( | |
| minimum=5, | |
| maximum=30, | |
| value=10, | |
| step=1, | |
| label="Duration (seconds)" | |
| ) | |
| fps_slider = gr.Slider( | |
| minimum=12, | |
| maximum=30, | |
| value=24, | |
| step=1, | |
| label="FPS" | |
| ) | |
| with gr.Row(): | |
| num_images_slider = gr.Slider( | |
| minimum=3, | |
| maximum=10, | |
| value=5, | |
| step=1, | |
| label="Number of Scenes" | |
| ) | |
| image_size_slider = gr.Slider( | |
| minimum=256, | |
| maximum=768, | |
| value=512, | |
| step=128, | |
| label="Image Size" | |
| ) | |
| motion_slider = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.3, | |
| step=0.1, | |
| label="Motion Strength" | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π΅ Audio Settings") | |
| audio_type_radio = gr.Radio( | |
| choices=["narration", "music", "both"], | |
| value="both", | |
| label="Audio Type" | |
| ) | |
| voice_radio = gr.Radio( | |
| choices=["male", "female"], | |
| value="female", | |
| label="Voice Gender" | |
| ) | |
| music_dropdown = gr.Dropdown( | |
| choices=["ambient", "cinematic", "upbeat", "peaceful", "dramatic"], | |
| value="peaceful", | |
| label="Music Style" | |
| ) | |
| generate_btn = gr.Button( | |
| "π¬ Generate Video", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(): | |
| video_output = gr.Video( | |
| label="Generated Video", | |
| visible=False | |
| ) | |
| status_text = gr.Textbox( | |
| label="Status", | |
| visible=False, | |
| interactive=False | |
| ) | |
| with gr.TabItem("πΌοΈ Image Preview"): | |
| gr.Markdown("### Preview image generation before creating the full video") | |
| with gr.Row(): | |
| preview_prompt = gr.Textbox( | |
| label="Test Prompt", | |
| placeholder="Enter a prompt to test image generation...", | |
| value="A majestic dragon flying over a castle" | |
| ) | |
| with gr.Row(): | |
| style_dropdown = gr.Dropdown( | |
| choices=["photorealistic", "anime", "oil painting", "watercolor", "3D render"], | |
| value="photorealistic", | |
| label="Art Style" | |
| ) | |
| preview_btn = gr.Button("Generate Preview", variant="secondary") | |
| preview_image = gr.Image( | |
| label="Image Preview", | |
| type="pil", | |
| elem_classes=["preview-box"] | |
| ) | |
| # Example prompts | |
| gr.Markdown("### π‘ Example Prompts") | |
| examples = gr.Examples( | |
| examples=[ | |
| ["A futuristic city with flying cars and neon lights at night", 15, 24, "both", "female", "cinematic", 5, 512, 0.5], | |
| ["A peaceful beach with waves crashing and palm trees swaying", 10, 24, "music", "male", "peaceful", 4, 512, 0.3], | |
| ["A magical forest with glowing mushrooms and fairy lights", 12, 24, "both", "female", "ambient", 6, 512, 0.4], | |
| ["A bustling marketplace in ancient Rome", 8, 24, "narration", "male", "dramatic", 4, 512, 0.6], | |
| ], | |
| inputs=[prompt_input, duration_slider, fps_slider, audio_type_radio, voice_radio, music_dropdown, num_images_slider, image_size_slider, motion_slider], | |
| outputs=[video_output], | |
| fn=generate_video, | |
| ) | |
| # Event handlers | |
| generate_btn.click( | |
| fn=generate_video, | |
| inputs=[ | |
| prompt_input, duration_slider, fps_slider, | |
| audio_type_radio, voice_radio, music_dropdown, | |
| num_images_slider, image_size_slider, motion_slider | |
| ], | |
| outputs=[video_output], | |
| show_progress=True | |
| ).then( | |
| fn=lambda: "Video generation complete! You can now download your video.", | |
| outputs=[status_text] | |
| ) | |
| preview_btn.click( | |
| fn=generate_sample_image, | |
| inputs=[preview_prompt, style_dropdown], | |
| outputs=[preview_image] | |
| ) | |
| # Show status text when generation starts | |
| generate_btn.click( | |
| fn=lambda: "Starting video generation... This may take a few minutes.", | |
| outputs=[status_text] | |
| ) | |
| # Make video output visible after generation | |
| generate_btn.click( | |
| fn=lambda: gr.Video(visible=True), | |
| outputs=[video_output] | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_demo() | |
| demo.launch( | |
| share=True, | |
| show_error=True, | |
| show_tips=True | |
| ) |