Spaces:

Taf2023
/

super-bold-space

Runtime error

App Files Files Community

super-bold-space / app.py

Taf2023

Deploy Gradio app with multiple files

792b77a verified 2 months ago

raw

history blame contribute delete

12.7 kB

	import gradio as gr
	import spaces
	import torch
	from diffusers import DiffusionPipeline
	import numpy as np
	from PIL import Image
	import tempfile
	import os
	from moviepy.editor import ImageSequenceClip, AudioFileClip
	import soundfile as sf
	from transformers import pipeline
	import time
	from typing import List, Tuple, Optional
	import json

	from config import Config
	from utils import VideoGenerator, AudioGenerator, ImageGenerator

	# Initialize generators
	image_gen = ImageGenerator()
	audio_gen = AudioGenerator()
	video_gen = VideoGenerator()

	@spaces.GPU(duration=1500)
	def compile_transformer():
	"""Compile the Stable Diffusion transformer for faster inference"""
	with spaces.aoti_capture(image_gen.pipe.transformer) as call:
	image_gen.pipe("test compilation prompt")

	exported = torch.export.export(
	image_gen.pipe.transformer,
	args=call.args,
	kwargs=call.kwargs,
	)
	return spaces.aoti_compile(exported)

	# Compile during startup
	print("Compiling AI models for optimal performance...")
	compiled_transformer = compile_transformer()
	spaces.aoti_apply(compiled_transformer, image_gen.pipe.transformer)
	print("✅ Models compiled successfully!")

	@spaces.GPU(duration=120)
	def generate_video(
	prompt: str,
	duration: int,
	fps: int,
	audio_type: str,
	voice_gender: str,
	music_style: str,
	num_images: int,
	image_size: int,
	motion_strength: float,
	progress=gr.Progress()
	) -> str:
	"""
	Generate a video from text prompt with AI-generated images and audio

	Args:
	prompt: Text description for the video content
	duration: Duration of the video in seconds
	fps: Frames per second for the video
	audio_type: Type of audio to generate (narration/music/both)
	voice_gender: Gender for voice narration
	music_style: Style of background music
	num_images: Number of unique images to generate
	image_size: Size of generated images
	motion_strength: Strength of motion between frames

	Returns:
	Path to the generated video file
	"""
	try:
	progress(0.1, desc="Starting video generation...")

	# Calculate timing
	total_frames = duration * fps
	frames_per_image = total_frames // num_images

	progress(0.2, desc="Generating images...")
	# Generate images
	images = []
	for i in range(num_images):
	# Slightly vary the prompt for each image
	varied_prompt = f"{prompt}, frame {i+1}, cinematic lighting"
	image = image_gen.generate_image(
	prompt=varied_prompt,
	size=(image_size, image_size)
	)
	images.append(image)
	progress(0.2 + (i / num_images) * 0.3, desc=f"Generated image {i+1}/{num_images}")

	progress(0.5, desc="Generating audio...")
	# Generate audio
	audio_path = None
	if audio_type in ["narration", "both"]:
	narration_path = audio_gen.generate_narration(
	text=prompt,
	gender=voice_gender,
	duration=duration
	)
	audio_path = narration_path

	if audio_type in ["music", "both"]:
	music_path = audio_gen.generate_music(
	style=music_style,
	duration=duration
	)
	if audio_path and audio_type == "both":
	# Mix narration and music
	audio_path = audio_gen.mix_audio(audio_path, music_path)
	elif not audio_path:
	audio_path = music_path

	progress(0.7, desc="Creating video frames...")
	# Create video frames with motion
	video_frames = video_gen.create_motion_frames(
	images=images,
	frames_per_image=frames_per_image,
	motion_strength=motion_strength
	)

	progress(0.9, desc="Composing final video...")
	# Create video
	video_path = video_gen.create_video(
	frames=video_frames,
	fps=fps,
	audio_path=audio_path,
	duration=duration
	)

	progress(1.0, desc="Video generation complete!")
	return video_path

	except Exception as e:
	raise gr.Error(f"Error generating video: {str(e)}")

	@spaces.GPU(duration=60)
	def generate_sample_image(prompt: str, style: str) -> Image.Image:
	"""Generate a sample image for preview"""
	styled_prompt = f"{prompt}, {style} style, high quality, detailed"
	return image_gen.generate_image(
	prompt=styled_prompt,
	size=(512, 512)
	)

	def create_demo():
	"""Create the Gradio demo interface"""

	with gr.Blocks(
	title="AI Video Generator",
	theme=gr.themes.Soft(),
	css="""
	.gradio-container {
	max-width: 1200px !important;
	}
	.header-text {
	text-align: center;
	margin-bottom: 2rem;
	}
	.preview-box {
	border: 2px dashed #ccc;
	border-radius: 10px;
	padding: 20px;
	text-align: center;
	}
	"""
	) as demo:

	gr.HTML("""
	<div class="header-text">
	<h1>🎬 AI Video Generator</h1>
	<p>Create stunning videos from text prompts using AI-powered image and audio generation</p>
	<p><a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">Built with anycoder</a></p>
	</div>
	""")

	with gr.Tabs():
	with gr.TabItem("🎥 Generate Video"):
	with gr.Row():
	with gr.Column(scale=2):
	gr.Markdown("### 📝 Video Description")
	prompt_input = gr.Textbox(
	label="Enter your video concept",
	placeholder="A serene landscape with mountains and a lake at sunset...",
	lines=3,
	value="A beautiful forest with sunlight filtering through the trees, birds flying, peaceful nature scene"
	)

	gr.Markdown("### ⚙️ Video Settings")
	with gr.Row():
	duration_slider = gr.Slider(
	minimum=5,
	maximum=30,
	value=10,
	step=1,
	label="Duration (seconds)"
	)
	fps_slider = gr.Slider(
	minimum=12,
	maximum=30,
	value=24,
	step=1,
	label="FPS"
	)

	with gr.Row():
	num_images_slider = gr.Slider(
	minimum=3,
	maximum=10,
	value=5,
	step=1,
	label="Number of Scenes"
	)
	image_size_slider = gr.Slider(
	minimum=256,
	maximum=768,
	value=512,
	step=128,
	label="Image Size"
	)

	motion_slider = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.3,
	step=0.1,
	label="Motion Strength"
	)

	with gr.Column(scale=1):
	gr.Markdown("### 🎵 Audio Settings")
	audio_type_radio = gr.Radio(
	choices=["narration", "music", "both"],
	value="both",
	label="Audio Type"
	)

	voice_radio = gr.Radio(
	choices=["male", "female"],
	value="female",
	label="Voice Gender"
	)

	music_dropdown = gr.Dropdown(
	choices=["ambient", "cinematic", "upbeat", "peaceful", "dramatic"],
	value="peaceful",
	label="Music Style"
	)

	generate_btn = gr.Button(
	"🎬 Generate Video",
	variant="primary",
	size="lg"
	)

	with gr.Column():
	video_output = gr.Video(
	label="Generated Video",
	visible=False
	)

	status_text = gr.Textbox(
	label="Status",
	visible=False,
	interactive=False
	)

	with gr.TabItem("🖼️ Image Preview"):
	gr.Markdown("### Preview image generation before creating the full video")

	with gr.Row():
	preview_prompt = gr.Textbox(
	label="Test Prompt",
	placeholder="Enter a prompt to test image generation...",
	value="A majestic dragon flying over a castle"
	)

	with gr.Row():
	style_dropdown = gr.Dropdown(
	choices=["photorealistic", "anime", "oil painting", "watercolor", "3D render"],
	value="photorealistic",
	label="Art Style"
	)
	preview_btn = gr.Button("Generate Preview", variant="secondary")

	preview_image = gr.Image(
	label="Image Preview",
	type="pil",
	elem_classes=["preview-box"]
	)

	# Example prompts
	gr.Markdown("### 💡 Example Prompts")
	examples = gr.Examples(
	examples=[
	["A futuristic city with flying cars and neon lights at night", 15, 24, "both", "female", "cinematic", 5, 512, 0.5],
	["A peaceful beach with waves crashing and palm trees swaying", 10, 24, "music", "male", "peaceful", 4, 512, 0.3],
	["A magical forest with glowing mushrooms and fairy lights", 12, 24, "both", "female", "ambient", 6, 512, 0.4],
	["A bustling marketplace in ancient Rome", 8, 24, "narration", "male", "dramatic", 4, 512, 0.6],
	],
	inputs=[prompt_input, duration_slider, fps_slider, audio_type_radio, voice_radio, music_dropdown, num_images_slider, image_size_slider, motion_slider],
	outputs=[video_output],
	fn=generate_video,
	)

	# Event handlers
	generate_btn.click(
	fn=generate_video,
	inputs=[
	prompt_input, duration_slider, fps_slider,
	audio_type_radio, voice_radio, music_dropdown,
	num_images_slider, image_size_slider, motion_slider
	],
	outputs=[video_output],
	show_progress=True
	).then(
	fn=lambda: "Video generation complete! You can now download your video.",
	outputs=[status_text]
	)

	preview_btn.click(
	fn=generate_sample_image,
	inputs=[preview_prompt, style_dropdown],
	outputs=[preview_image]
	)

	# Show status text when generation starts
	generate_btn.click(
	fn=lambda: "Starting video generation... This may take a few minutes.",
	outputs=[status_text]
	)

	# Make video output visible after generation
	generate_btn.click(
	fn=lambda: gr.Video(visible=True),
	outputs=[video_output]
	)

	return demo

	if __name__ == "__main__":
	demo = create_demo()
	demo.launch(
	share=True,
	show_error=True,
	show_tips=True
	)