Spaces:

MCP-1st-Birthday
/

AIQuoteClipGenerator

Running

App Files Files Community

ladybug11 commited on 14 days ago

Commit

2f051ee

1 Parent(s): 6038041

update

Browse files

Files changed (3) hide show

app.py +80 -30
modal_video_processing.py +16 -15
quote_generator_gemini.py +1 -1

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import time
 import json
 import shutil
 import random
-import tempfile
 import requests
 import gradio as gr
@@ -147,7 +146,16 @@ def search_pexels_video_tool(style: str, niche: str) -> dict:
     queries = search_strategies.get(niche, {}).get(style, ["aesthetic nature"])
     try:
-        headers = {"Authorization": PEXELS_API_KEY} if PEXELS_API_KEY else {}
         query = random.choice(queries)
         url = (
@@ -162,9 +170,7 @@ def search_pexels_video_tool(style: str, niche: str) -> dict:
             video_files = video.get("video_files", [])
             portrait_videos = [
-                vf
-                for vf in video_files
-                if vf.get("width", 0) < vf.get("height", 0)
             ]
             if portrait_videos:
@@ -203,7 +209,12 @@ def search_pexels_video_tool(style: str, niche: str) -> dict:
 @tool
-def create_quote_video_tool(video_url: str, quote_text: str, output_path: str) -> dict:
     """
     Create a quote video by calling a Modal endpoint that overlays text on a background video.
@@ -211,6 +222,7 @@ def create_quote_video_tool(video_url: str, quote_text: str, output_path: str) -
         video_url: Direct URL of the background video (e.g. from Pexels).
         quote_text: The quote text to be overlaid on the video.
         output_path: Local file path where the resulting video should be saved.
     Returns:
         A dictionary with:
@@ -232,12 +244,13 @@ def create_quote_video_tool(video_url: str, quote_text: str, output_path: str) -
         }
     try:
-        print("🚀 Processing on Modal (fast!)...")
         response = requests.post(
             modal_endpoint,
             json={
                 "video_url": video_url,
                 "quote_text": quote_text,
             },
             timeout=120,
         )
@@ -271,7 +284,7 @@ def create_quote_video_tool(video_url: str, quote_text: str, output_path: str) -
         return {
             "success": True,
             "output_path": output_path,
-            "message": f"Video created via Modal (~{size_mb:.2f}MB).",
         }
     except Exception as e:
@@ -300,8 +313,8 @@ def initialize_agent():
                 "time",
                 "json",
                 "random",
-                "tempfile",
                 "requests",
             ],
             max_steps=15,
         )
@@ -321,7 +334,12 @@ agent, agent_error = initialize_agent()
 # PIPELINES
 # -------------------------------------------------
-def mcp_agent_pipeline(niche: str, style: str, num_variations: int = 1):
     """
     MAIN PIPELINE: uses smolagents CodeAgent.run to plan & call tools.
@@ -336,7 +354,7 @@ def mcp_agent_pipeline(niche: str, style: str, num_variations: int = 1):
     if agent_error or agent is None:
         base_log.append(f"❌ Agent initialization failed: {agent_error}")
         base_log.append("🔄 Falling back to direct tool pipeline...")
-        status, vids = fallback_pipeline(niche, style, num_variations)
         return "\n".join(base_log + [status]), vids
     try:
@@ -352,20 +370,26 @@ def mcp_agent_pipeline(niche: str, style: str, num_variations: int = 1):
 You are an autonomous Python agent helping creators generate short vertical quote videos.
 Niche: {niche}
-Style: {style}
 Number of variations: {num_variations}
 You have these TOOLS available:
 1. generate_quote_tool(niche: str, style: str) -> str
-   - Returns a single quote as plain text.
 2. search_pexels_video_tool(style: str, niche: str) -> dict
    - Returns a dict with:
      - "success": bool
      - "video_url": str or None
-3. create_quote_video_tool(video_url: str, quote_text: str, output_path: str) -> dict
    - Writes a video file to output_path and returns a dict with:
      - "success": bool
      - "output_path": str or None
@@ -376,8 +400,8 @@ Your job:
 2. For each variation i from 1 to {num_variations}:
    - Call search_pexels_video_tool(style, niche).
    - If it succeeds, compute output_path exactly as:
-     "{base_prefix}{{i}}.mp4"
-   - Call create_quote_video_tool(video_url, quote_text, output_path).
 3. Collect only variations where create_quote_video_tool returns success == True and a non-empty output_path.
 4. Build a human-readable status_log string summarizing:
    - Which tools you called
@@ -433,15 +457,27 @@ CRITICAL:
         return full_status, valid_paths[:3]
     except Exception as e:
-        fallback_status, fallback_videos = fallback_pipeline(niche, style, num_variations)
         combined_status = "\n".join(
             base_log
-            + [f"⚠️ Agent pipeline error: {str(e)}", "", "🔄 Switched to fallback pipeline:", fallback_status]
         )
         return combined_status, fallback_videos
-def fallback_pipeline(niche: str, style: str, num_variations: int = 1):
     """Fallback pipeline: direct tool calls without agent planning."""
     status_log = []
     status_log.append("🔄 **FALLBACK MODE (Direct Tool Execution)**\n")
@@ -484,6 +520,7 @@ def fallback_pipeline(niche: str, style: str, num_variations: int = 1):
             video_url=vr["video_url"],
             quote_text=quote,
             output_path=output_path,
         )
         if creation_result.get("success"):
@@ -523,11 +560,11 @@ with gr.Blocks(
     ### MCP-Powered with Gemini AI
     **Key Features:**
-    - 🌟 **Gemini AI** with quote-history to avoid repetition
-    - 🤖 **smolagents CodeAgent** for planning & tool-use
-    - 🔗 **MCP Client Ready** (uses external MCP tools if available)
-    - 🎥 **Modal** for fast video rendering
-    - 🎨 Generate multiple vertical quote video variations
     """
     )
@@ -590,6 +627,17 @@ with gr.Blocks(
                 value="Cinematic",
             )
             num_variations = gr.Slider(
                 minimum=1,
                 maximum=3,
@@ -619,10 +667,10 @@ with gr.Blocks(
         """
     ---
     ### ✨ Features
-    - 🌟 **Gemini-powered** quote variety (history-aware)
-    - 🎨 Multiple aesthetic video variations
-    - ⚡ **Modal**-accelerated rendering
-    - 🤖 **smolagents** CodeAgent for autonomous tool-calling
     - 🔗 Optional MCP integration via MCPClient
     ### 🏆 Hackathon: MCP 1st Birthday
@@ -632,10 +680,11 @@ with gr.Blocks(
     """
     )
-    def process_and_display(niche, style, num_variations):
         status, videos = mcp_agent_pipeline(
             niche=str(niche),
             style=str(style),
             num_variations=int(num_variations),
         )
@@ -649,7 +698,7 @@ with gr.Blocks(
     generate_btn.click(
         process_and_display,
-        inputs=[niche, style, num_variations],
         outputs=[
             output,
             video1,
@@ -678,3 +727,4 @@ with gr.Blocks(
 if __name__ == "__main__":
     demo.launch(allowed_paths=["/data/gallery_videos"])

 import json
 import shutil
 import random
 import requests
 import gradio as gr
     queries = search_strategies.get(niche, {}).get(style, ["aesthetic nature"])
     try:
+        if not PEXELS_API_KEY:
+            return {
+                "success": False,
+                "video_url": None,
+                "search_query": "",
+                "pexels_url": None,
+                "error": "PEXELS_API_KEY not configured",
+            }
+        headers = {"Authorization": PEXELS_API_KEY}
         query = random.choice(queries)
         url = (
             video_files = video.get("video_files", [])
             portrait_videos = [
+                vf for vf in video_files if vf.get("width", 0) < vf.get("height", 0)
             ]
             if portrait_videos:
 @tool
+def create_quote_video_tool(
+    video_url: str,
+    quote_text: str,
+    output_path: str,
+    text_style: str = "classic_center",
+) -> dict:
     """
     Create a quote video by calling a Modal endpoint that overlays text on a background video.
         video_url: Direct URL of the background video (e.g. from Pexels).
         quote_text: The quote text to be overlaid on the video.
         output_path: Local file path where the resulting video should be saved.
+        text_style: Visual text style/layout (e.g. 'classic_center', 'lower_third_serif', 'typewriter_top').
     Returns:
         A dictionary with:
         }
     try:
+        print(f"🚀 Processing on Modal (fast!) with text_style={text_style}...")
         response = requests.post(
             modal_endpoint,
             json={
                 "video_url": video_url,
                 "quote_text": quote_text,
+                "text_style": text_style,
             },
             timeout=120,
         )
         return {
             "success": True,
             "output_path": output_path,
+            "message": f"Video created via Modal (~{size_mb:.2f}MB, style={text_style}).",
         }
     except Exception as e:
                 "time",
                 "json",
                 "random",
                 "requests",
+                "shutil",
             ],
             max_steps=15,
         )
 # PIPELINES
 # -------------------------------------------------
+def mcp_agent_pipeline(
+    niche: str,
+    style: str,
+    text_style: str = "classic_center",
+    num_variations: int = 1,
+):
     """
     MAIN PIPELINE: uses smolagents CodeAgent.run to plan & call tools.
     if agent_error or agent is None:
         base_log.append(f"❌ Agent initialization failed: {agent_error}")
         base_log.append("🔄 Falling back to direct tool pipeline...")
+        status, vids = fallback_pipeline(niche, style, text_style, num_variations)
         return "\n".join(base_log + [status]), vids
     try:
 You are an autonomous Python agent helping creators generate short vertical quote videos.
 Niche: {niche}
+Visual Style: {style}
+Text style for quotes: {text_style}
 Number of variations: {num_variations}
 You have these TOOLS available:
 1. generate_quote_tool(niche: str, style: str) -> str
+   - Returns a single SHORT quote as plain text.
 2. search_pexels_video_tool(style: str, niche: str) -> dict
    - Returns a dict with:
      - "success": bool
      - "video_url": str or None
+3. create_quote_video_tool(
+       video_url: str,
+       quote_text: str,
+       output_path: str,
+       text_style: str = "classic_center"
+   ) -> dict
    - Writes a video file to output_path and returns a dict with:
      - "success": bool
      - "output_path": str or None
 2. For each variation i from 1 to {num_variations}:
    - Call search_pexels_video_tool(style, niche).
    - If it succeeds, compute output_path exactly as:
+     "{base_prefix}" + str(i) + ".mp4"
+   - Call create_quote_video_tool(video_url, quote_text, output_path, text_style="{text_style}").
 3. Collect only variations where create_quote_video_tool returns success == True and a non-empty output_path.
 4. Build a human-readable status_log string summarizing:
    - Which tools you called
         return full_status, valid_paths[:3]
     except Exception as e:
+        fallback_status, fallback_videos = fallback_pipeline(
+            niche, style, text_style, num_variations
+        )
         combined_status = "\n".join(
             base_log
+            + [
+                f"⚠️ Agent pipeline error: {str(e)}",
+                "",
+                "🔄 Switched to fallback pipeline:",
+                fallback_status,
+            ]
         )
         return combined_status, fallback_videos
+def fallback_pipeline(
+    niche: str,
+    style: str,
+    text_style: str = "classic_center",
+    num_variations: int = 1,
+):
     """Fallback pipeline: direct tool calls without agent planning."""
     status_log = []
     status_log.append("🔄 **FALLBACK MODE (Direct Tool Execution)**\n")
             video_url=vr["video_url"],
             quote_text=quote,
             output_path=output_path,
+            text_style=text_style,
         )
         if creation_result.get("success"):
     ### MCP-Powered with Gemini AI
     **Key Features:**
+    - 🌟 Short, non-repeating Gemini quotes (per niche history)
+    - 🤖 smolagents CodeAgent for tool planning
+    - 🔗 Optional MCP client integration
+    - 🎥 Modal for fast video rendering
+    - 🅰️ Text style controls (font & placement)
     """
     )
                 value="Cinematic",
             )
+            text_style = gr.Dropdown(
+                choices=[
+                    "classic_center",
+                    "lower_third_serif",
+                    "typewriter_top",
+                ],
+                label="🅰️ Text Style",
+                value="classic_center",
+                info="Change font & quote placement on the video",
+            )
             num_variations = gr.Slider(
                 minimum=1,
                 maximum=3,
         """
     ---
     ### ✨ Features
+    - 🌟 Gemini-powered, short non-repeating quotes (per niche)
+    - 🎨 Multiple aesthetic video & text layouts
+    - ⚡ Modal-accelerated rendering
+    - 🤖 smolagents CodeAgent for autonomous tool-calling
     - 🔗 Optional MCP integration via MCPClient
     ### 🏆 Hackathon: MCP 1st Birthday
     """
     )
+    def process_and_display(niche, style, text_style, num_variations):
         status, videos = mcp_agent_pipeline(
             niche=str(niche),
             style=str(style),
+            text_style=str(text_style),
             num_variations=int(num_variations),
         )
     generate_btn.click(
         process_and_display,
+        inputs=[niche, style, text_style, num_variations],
         outputs=[
             output,
             video1,
 if __name__ == "__main__":
     demo.launch(allowed_paths=["/data/gallery_videos"])

modal_video_processing.py CHANGED Viewed

@@ -15,7 +15,7 @@ image = modal.Image.debian_slim(python_version="3.11").pip_install(
     "imageio==2.31.1",
     "imageio-ffmpeg",
     "requests",
-    "fastapi"
 )
@@ -24,15 +24,15 @@ image = modal.Image.debian_slim(python_version="3.11").pip_install(
     cpu=2,
     memory=2048,
     timeout=180,
-    concurrency_limit=10,  # Allow 10 videos at once
-    allow_concurrent_inputs=10,  # Process multiple in parallel
     container_idle_timeout=120,
 )
 def process_quote_video(
     video_url: str,
     quote_text: str,
     audio_b64: str = None,
-    text_style: str = "classic_center"
 ) -> bytes:
     """
     Process quote video on Modal - FAST version.
@@ -59,7 +59,7 @@ def process_quote_video(
     # Load video
     video = VideoFileClip(temp_video.name)
-    # Optional: trim to first 10s to keep things snappy
     if video.duration > 10:
         video = video.subclip(0, 10)
@@ -67,17 +67,15 @@ def process_quote_video(
     # Choose layout + font behavior based on text_style
     # Supported:
-    # - "classic_center"        → centered, sans serif (default)
-    # - "lower_third_serif"     → bottom, serif
-    # - "typewriter_top"        → top, monospace vibe
     def make_text_frame(t):
         img = Image.new("RGBA", (w, h), (0, 0, 0, 0))
         draw = ImageDraw.Draw(img)
-        # Base font size
         base_font_size = int(h * 0.03)
-        # Defaults
         font_paths = []
         y_mode = "center"
         font_size = base_font_size
@@ -96,8 +94,7 @@ def process_quote_video(
             ]
             y_mode = "top"
             font_size = int(h * 0.028)
-        else:
-            # classic_center
             font_paths = [
                 "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
                 "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
@@ -200,7 +197,9 @@ def process_quote_video(
     os.unlink(output_file.name)
     total_time = time.time() - start_time
-    print(f"🎉 Total: {total_time:.1f}s, Size: {len(video_bytes)/1024/1024:.2f}MB, Style: {text_style}")
     return video_bytes
@@ -218,7 +217,9 @@ def process_video_endpoint(data: dict):
         return {"error": "Missing video_url or quote_text"}, 400
     try:
-        video_bytes = process_quote_video.remote(video_url, quote_text, audio_b64, text_style)
         import base64
@@ -281,4 +282,4 @@ def process_batch_endpoint(data: dict):
         }
     except Exception as e:
-        return {"error": str(e)}, 500

     "imageio==2.31.1",
     "imageio-ffmpeg",
     "requests",
+    "fastapi",
 )
     cpu=2,
     memory=2048,
     timeout=180,
+    concurrency_limit=10,
+    allow_concurrent_inputs=10,
     container_idle_timeout=120,
 )
 def process_quote_video(
     video_url: str,
     quote_text: str,
     audio_b64: str = None,
+    text_style: str = "classic_center",
 ) -> bytes:
     """
     Process quote video on Modal - FAST version.
     # Load video
     video = VideoFileClip(temp_video.name)
+    # Trim to first 10 seconds
     if video.duration > 10:
         video = video.subclip(0, 10)
     # Choose layout + font behavior based on text_style
     # Supported:
+    # - "classic_center"      → centered, sans serif (default)
+    # - "lower_third_serif"   → bottom, serif
+    # - "typewriter_top"      → top, monospace-ish
     def make_text_frame(t):
         img = Image.new("RGBA", (w, h), (0, 0, 0, 0))
         draw = ImageDraw.Draw(img)
         base_font_size = int(h * 0.03)
         font_paths = []
         y_mode = "center"
         font_size = base_font_size
             ]
             y_mode = "top"
             font_size = int(h * 0.028)
+        else:  # classic_center
             font_paths = [
                 "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
                 "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
     os.unlink(output_file.name)
     total_time = time.time() - start_time
+    print(
+        f"🎉 Total: {total_time:.1f}s, Size: {len(video_bytes)/1024/1024:.2f}MB, Style: {text_style}"
+    )
     return video_bytes
         return {"error": "Missing video_url or quote_text"}, 400
     try:
+        video_bytes = process_quote_video.remote(
+            video_url, quote_text, audio_b64, text_style
+        )
         import base64
         }
     except Exception as e:
+        return {"error": str(e)}, 500

quote_generator_gemini.py CHANGED Viewed

@@ -6,7 +6,7 @@ import google.generativeai as genai
 import os
 import json
 import time
-from typing import List, Optional
 class QuoteGenerator:

 import os
 import json
 import time
+from typing import Optional
 class QuoteGenerator: