Spaces:

MCP-1st-Birthday
/

AIQuoteClipGenerator

Running

App Files Files Community

ladybug11 commited on about 1 month ago

Commit

4908797

1 Parent(s): 850d1aa

update

Browse files

Files changed (1) hide show

modal_video_processing.py +14 -96

modal_video_processing.py CHANGED Viewed

@@ -23,64 +23,40 @@ image = modal.Image.debian_slim(python_version="3.11").pip_install(
     cpu=2,
     memory=2048,
     timeout=180,
-    keep_warm=1,  # Keep 1 container warm to eliminate cold starts!
-    container_idle_timeout=300,  # Keep alive for 5 minutes
 )
 def process_quote_video(video_url: str, quote_text: str, audio_b64: str = None) -> bytes:
     """
-    Process quote video on Modal's fast infrastructure.
-    Downloads video, adds text overlay, optionally adds audio, returns video bytes.
-    Args:
-        video_url: URL of background video
-        quote_text: Quote to overlay
-        audio_b64: Optional base64-encoded audio file
-    Returns:
-        bytes: Processed video file as bytes
     """
     import tempfile
     import requests
-    from moviepy.editor import VideoFileClip, ImageClip, CompositeVideoClip, AudioFileClip
     from PIL import Image, ImageDraw, ImageFont
     import numpy as np
     import time
-    import base64
     start_time = time.time()
-    print(f"🎬 Starting video processing on Modal...")
-    # Download video with streaming
-    print("📥 Downloading video...")
-    download_start = time.time()
     response = requests.get(video_url, stream=True, timeout=30)
     response.raise_for_status()
     temp_video = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
     with open(temp_video.name, 'wb') as f:
-        for chunk in response.iter_content(chunk_size=1024*1024):  # 1MB chunks
             f.write(chunk)
-    print(f"✅ Video downloaded in {time.time() - download_start:.1f}s")
     # Load video
-    print("🎥 Loading video...")
-    load_start = time.time()
     video = VideoFileClip(temp_video.name)
-    # Limit video duration to 10 seconds max for faster processing
-    # Instagram quote videos are typically short anyway
     if video.duration > 10:
         video = video.subclip(0, 10)
     w, h = video.size
-    print(f"   Dimensions: {w}x{h}, Duration: {video.duration:.1f}s")
-    print(f"✅ Video loaded in {time.time() - load_start:.1f}s")
-    # Create text overlay using PIL
-    print("✍️ Creating text overlay...")
-    overlay_start = time.time()
     def make_text_frame(t):
         img = Image.new('RGBA', (w, h), (0, 0, 0, 0))
         draw = ImageDraw.Draw(img)
@@ -90,10 +66,7 @@ def process_quote_video(video_url: str, quote_text: str, audio_b64: str = None)
         try:
             font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
         except:
-            try:
-                font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", font_size)
-            except:
-                font = ImageFont.load_default()
         max_width = int(w * 0.6)
@@ -139,54 +112,18 @@ def process_quote_video(video_url: str, quote_text: str, audio_b64: str = None)
         return np.array(img)
     text_clip = ImageClip(make_text_frame(0), duration=video.duration)
-    print(f"✅ Text overlay created in {time.time() - overlay_start:.1f}s")
     # Composite
-    print("🎨 Compositing video...")
-    composite_start = time.time()
     final_video = CompositeVideoClip([video, text_clip])
-    print(f"✅ Composited in {time.time() - composite_start:.1f}s")
-    # Add audio if provided
-    if audio_b64:
-        print("🎤 Adding voice commentary audio...")
-        audio_start = time.time()
-        try:
-            # Decode base64 audio
-            audio_bytes = base64.b64decode(audio_b64)
-            # Save to temp file
-            temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
-            with open(temp_audio.name, 'wb') as f:
-                f.write(audio_bytes)
-            # Load audio clip
-            audio_clip = AudioFileClip(temp_audio.name)
-            # Use the shorter duration between video and audio
-            audio_duration = min(audio_clip.duration, final_video.duration)
-            audio_clip = audio_clip.subclip(0, audio_duration)
-            # Set audio on video
-            final_video = final_video.set_audio(audio_clip)
-            print(f"✅ Audio added in {time.time() - audio_start:.1f}s")
-            # Cleanup audio temp file
-            os.unlink(temp_audio.name)
-        except Exception as e:
-            print(f"⚠️ Audio failed: {e}, continuing without audio")
-    # Export with optimized settings
-    print("📦 Exporting video...")
-    export_start = time.time()
     output_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
     final_video.write_videofile(
         output_file.name,
         codec='libx264',
         audio_codec='aac',
-        fps=10,  # Lower fps for speed
         preset='ultrafast',
         threads=2,
         verbose=False,
@@ -195,9 +132,7 @@ def process_quote_video(video_url: str, quote_text: str, audio_b64: str = None)
         ffmpeg_params=['-crf', '30', '-g', '30']
     )
-    print(f"✅ Video exported in {time.time() - export_start:.1f}s")
-    # Read video bytes
     with open(output_file.name, 'rb') as f:
         video_bytes = f.read()
@@ -208,23 +143,18 @@ def process_quote_video(video_url: str, quote_text: str, audio_b64: str = None)
     os.unlink(output_file.name)
     total_time = time.time() - start_time
-    print(f"🎉 TOTAL PROCESSING TIME: {total_time:.1f}s")
-    print(f"   Video size: {len(video_bytes) / 1024 / 1024:.2f}MB")
     return video_bytes
-# Expose as web endpoint for easy calling from Gradio
 @app.function(image=image)
 @modal.web_endpoint(method="POST")
 def process_video_endpoint(data: dict):
-    """
-    Web endpoint to process videos with optional audio.
-    Accepts JSON with video_url, quote_text, and optional audio_b64.
-    """
     video_url = data.get("video_url")
     quote_text = data.get("quote_text")
-    audio_b64 = data.get("audio_b64")  # Changed from audio_url
     if not video_url or not quote_text:
         return {"error": "Missing video_url or quote_text"}, 400
@@ -232,7 +162,6 @@ def process_video_endpoint(data: dict):
     try:
         video_bytes = process_quote_video.remote(video_url, quote_text, audio_b64)
-        # Return video bytes as base64
         import base64
         video_b64 = base64.b64encode(video_bytes).decode()
@@ -244,14 +173,3 @@ def process_video_endpoint(data: dict):
     except Exception as e:
         return {"error": str(e)}, 500
-if __name__ == "__main__":
-    # Test locally
-    with app.run():
-        result = process_quote_video.remote(
-            video_url="https://videos.pexels.com/video-files/3843433/3843433-uhd_2732_1440_25fps.mp4",
-            quote_text="Test quote for local testing",
-            audio_b64=None
-        )
-        print(f"Got video: {len(result)} bytes")

     cpu=2,
     memory=2048,
     timeout=180,
+    keep_warm=1,  # Keep 1 container warm
+    container_idle_timeout=300,
 )
 def process_quote_video(video_url: str, quote_text: str, audio_b64: str = None) -> bytes:
     """
+    Process quote video on Modal - FAST version (no audio).
     """
     import tempfile
     import requests
+    from moviepy.editor import VideoFileClip, ImageClip, CompositeVideoClip
     from PIL import Image, ImageDraw, ImageFont
     import numpy as np
     import time
     start_time = time.time()
+    # Download video
     response = requests.get(video_url, stream=True, timeout=30)
     response.raise_for_status()
     temp_video = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
     with open(temp_video.name, 'wb') as f:
+        for chunk in response.iter_content(chunk_size=1024*1024):
             f.write(chunk)
     # Load video
     video = VideoFileClip(temp_video.name)
     if video.duration > 10:
         video = video.subclip(0, 10)
     w, h = video.size
+    # Create text overlay
     def make_text_frame(t):
         img = Image.new('RGBA', (w, h), (0, 0, 0, 0))
         draw = ImageDraw.Draw(img)
         try:
             font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
         except:
+            font = ImageFont.load_default()
         max_width = int(w * 0.6)
         return np.array(img)
     text_clip = ImageClip(make_text_frame(0), duration=video.duration)
     # Composite
     final_video = CompositeVideoClip([video, text_clip])
+    # Export - FAST settings
     output_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
     final_video.write_videofile(
         output_file.name,
         codec='libx264',
         audio_codec='aac',
+        fps=10,
         preset='ultrafast',
         threads=2,
         verbose=False,
         ffmpeg_params=['-crf', '30', '-g', '30']
     )
+    # Read bytes
     with open(output_file.name, 'rb') as f:
         video_bytes = f.read()
     os.unlink(output_file.name)
     total_time = time.time() - start_time
+    print(f"🎉 Total: {total_time:.1f}s, Size: {len(video_bytes)/1024/1024:.2f}MB")
     return video_bytes
 @app.function(image=image)
 @modal.web_endpoint(method="POST")
 def process_video_endpoint(data: dict):
+    """Web endpoint"""
     video_url = data.get("video_url")
     quote_text = data.get("quote_text")
+    audio_b64 = data.get("audio_b64")
     if not video_url or not quote_text:
         return {"error": "Missing video_url or quote_text"}, 400
     try:
         video_bytes = process_quote_video.remote(video_url, quote_text, audio_b64)
         import base64
         video_b64 = base64.b64encode(video_bytes).decode()
     except Exception as e:
         return {"error": str(e)}, 500