Spaces:

MCP-1st-Birthday
/

AIQuoteClipGenerator

Running

App Files Files Community

ladybug11 commited on 17 days ago

Commit

aa1565f

1 Parent(s): c3ca514

update

Browse files

Files changed (1) hide show

app.py +158 -363

app.py CHANGED Viewed

@@ -3,12 +3,15 @@ import os
 import requests
 import random
 import tempfile
 from openai import OpenAI
-from smolagents import CodeAgent, MCPClient, tool
-from huggingface_hub import InferenceClient
 from moviepy.editor import VideoFileClip, ImageClip, CompositeVideoClip, AudioFileClip
 from PIL import Image, ImageDraw, ImageFont
-import textwrap
 import numpy as np
 from elevenlabs import ElevenLabs, VoiceSettings
@@ -34,7 +37,10 @@ except Exception as e:
     print(f"MCP initialization warning: {e}")
     mcp_enabled = False
-# Define custom tools for the MCP agent
 @tool
 def generate_quote_tool(niche: str, style: str) -> str:
     """
@@ -48,7 +54,6 @@ def generate_quote_tool(niche: str, style: str) -> str:
     Returns:
         A powerful, unique quote string
     """
     try:
         result = hybrid_quote_generator.generate_quote(niche, style, prefer_gemini=True)
@@ -71,6 +76,7 @@ def generate_quote_tool(niche: str, style: str) -> str:
     except Exception as e:
         return f"Error generating quote: {str(e)}"
 @tool
 def search_pexels_video_tool(style: str, niche: str) -> dict:
     """
@@ -193,24 +199,15 @@ def search_pexels_video_tool(style: str, niche: str) -> dict:
             "error": str(e)
         }
 @tool
 def generate_voice_commentary_tool(quote_text: str, niche: str, output_path: str) -> dict:
     """
     Generate insightful voice commentary explaining the deeper meaning of the quote.
     Uses Gemini to create thoughtful explanation, then ElevenLabs to voice it.
     This adds VALUE - not just reading what's already on screen.
-    Args:
-        quote_text: The quote to explain
-        niche: The niche/category for context
-        output_path: Path where to save the audio file
-    Returns:
-        Dictionary with success status, output path, and the explanation text
     """
     try:
-        # Step 1: Generate explanation using Gemini
         import google.generativeai as genai
         explanation_prompt = f"""Given this {niche} quote:
@@ -227,14 +224,9 @@ Requirements:
 - Make it thought-provoking
 - Don't start with "This quote..." - dive into the insight
-Example:
-Quote: "Between stimulus and response there is a space."
-Good: "In that pause lies your freedom. That's where you choose who you become, not who your habits make you."
 Return ONLY the commentary, nothing else."""
         genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
-        model = genai.GenerativeModel('gemini-1.5-flash')  # Updated model name
         response = model.generate_content(
             explanation_prompt,
@@ -247,20 +239,18 @@ Return ONLY the commentary, nothing else."""
         explanation = response.text.strip().strip('"').strip("'")
         print(f"📝 Commentary: {explanation}")
-        # Step 2: Generate voice using ElevenLabs
         audio = elevenlabs_client.text_to_speech.convert(
             text=explanation,
-            voice_id="pNInz6obpgDQGcFmaJgB",  # Adam - thoughtful and clear
             model_id="eleven_multilingual_v2",
             voice_settings=VoiceSettings(
-                stability=0.6,  # Stable for narration
                 similarity_boost=0.8,
-                style=0.6,  # Expressive for commentary
                 use_speaker_boost=True
             )
         )
-        # Save audio
         with open(output_path, 'wb') as f:
             for chunk in audio:
                 f.write(chunk)
@@ -280,34 +270,22 @@ Return ONLY the commentary, nothing else."""
             "message": f"Error creating commentary: {str(e)}"
         }
 @tool
 def create_quote_video_tool(video_url: str, quote_text: str, output_path: str, audio_path: str = None) -> dict:
     """
     Create a final quote video by overlaying text on the background video.
     Uses Modal for fast processing (4-8x faster) with local fallback.
     Optionally adds voice narration audio.
-    Args:
-        video_url: URL of the background video from Pexels
-        quote_text: The quote text to overlay
-        output_path: Path where to save the final video
-        audio_path: Optional path to audio file for voice narration
-    Returns:
-        Dictionary with success status and output path
     """
-    # Check if Modal is configured
     modal_endpoint = os.getenv("MODAL_ENDPOINT_URL")
     if modal_endpoint:
         try:
-            import requests
             import base64
             print("🚀 Processing on Modal (fast!)...")
-            # Prepare audio data if present
             audio_b64 = None
             if audio_path and os.path.exists(audio_path):
                 with open(audio_path, 'rb') as f:
@@ -315,26 +293,23 @@ def create_quote_video_tool(video_url: str, quote_text: str, output_path: str, a
                     audio_b64 = base64.b64encode(audio_bytes).decode()
                 print(f"   🎤 Including voice commentary audio ({len(audio_bytes)} bytes)")
-            # Call Modal endpoint with longer timeout
             response = requests.post(
                 modal_endpoint,
                 json={
                     "video_url": video_url,
                     "quote_text": quote_text,
-                    "audio_b64": audio_b64  # Pass audio as base64
                 },
-                timeout=120  # 2 minute timeout
             )
             if response.status_code == 200:
                 result = response.json()
                 if result.get("success"):
-                    # Decode video bytes
                     video_b64 = result["video"]
                     video_bytes = base64.b64decode(video_b64)
-                    # Save to output path
                     with open(output_path, 'wb') as f:
                         f.write(video_bytes)
@@ -350,7 +325,6 @@ def create_quote_video_tool(video_url: str, quote_text: str, output_path: str, a
             else:
                 print(f"⚠️ Modal HTTP error: {response.status_code}")
-            # If Modal failed, fall through to local processing
             print("⚠️ Modal failed, falling back to local processing...")
         except requests.Timeout:
@@ -360,190 +334,41 @@ def create_quote_video_tool(video_url: str, quote_text: str, output_path: str, a
     else:
         print("ℹ️ MODAL_ENDPOINT_URL not configured, using local processing")
-    # LOCAL PROCESSING - Skip if taking too long
-    print("🔧 Processing locally (may be slow)...")
-    print("⚠️ WARNING: Local processing can hang on HF Spaces!")
-    print("⚠️ Consider setting up Modal for 4-8x faster processing")
-    # Return error instead of hanging
     return {
         "success": False,
         "output_path": None,
         "message": "Local processing disabled - please configure Modal for video generation. Deploy Modal with: modal deploy modal_video_processing.py"
     }
-    # LOCAL PROCESSING (Fallback or if Modal not configured)
-    print("🔧 Processing locally...")
-    try:
-        import time
-        processing_start = time.time()
-        # Step 1: Download the video
-        response = requests.get(video_url, stream=True, timeout=30)
-        response.raise_for_status()
-        # Create temporary file for downloaded video
-        temp_video = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
-        with open(temp_video.name, 'wb') as f:
-            for chunk in response.iter_content(chunk_size=8192):
-                f.write(chunk)
-        # Step 2: Load video with MoviePy
-        video = VideoFileClip(temp_video.name)
-        # Get video dimensions
-        w, h = video.size
-        # Step 3: Create text overlay using PIL
-        def make_text_frame(t):
-            """Generate a text frame using PIL"""
-            # Create transparent image
-            img = Image.new('RGBA', (w, h), (0, 0, 0, 0))
-            draw = ImageDraw.Draw(img)
-            # Calculate font size (2.5% of video height - smaller for better aesthetic)
-            font_size = int(h * 0.025)
-            # Try to load a font, fall back to default if needed
-            try:
-                # Try common fonts available on Linux
-                font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
-            except:
-                try:
-                    font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", font_size)
-                except:
-                    # Fall back to default font
-                    font = ImageFont.load_default()
-            # Wrap text to fit width (60% of video width for better proportions)
-            max_width = int(w * 0.6)
-            # Manual text wrapping with better line length
-            words = quote_text.split()
-            lines = []
-            current_line = []
-            for word in words:
-                test_line = ' '.join(current_line + [word])
-                # Get text bbox to check width
-                bbox = draw.textbbox((0, 0), test_line, font=font)
-                text_width = bbox[2] - bbox[0]
-                if text_width <= max_width:
-                    current_line.append(word)
-                else:
-                    if current_line:
-                        lines.append(' '.join(current_line))
-                        current_line = [word]
-                    else:
-                        lines.append(word)
-            if current_line:
-                lines.append(' '.join(current_line))
-            # Calculate total text height with better line spacing
-            line_spacing = int(font_size * 0.4)
-            text_block_height = len(lines) * (font_size + line_spacing)
-            # Start y position (centered vertically)
-            y = (h - text_block_height) // 2
-            # Draw each line centered
-            for line in lines:
-                # Get text size
-                bbox = draw.textbbox((0, 0), line, font=font)
-                text_width = bbox[2] - bbox[0]
-                # Center horizontally
-                x = (w - text_width) // 2
-                # Draw black outline (stroke) - thinner for smaller text
-                outline_width = max(2, int(font_size * 0.08))
-                for adj_x in range(-outline_width, outline_width + 1):
-                    for adj_y in range(-outline_width, outline_width + 1):
-                        draw.text((x + adj_x, y + adj_y), line, font=font, fill='black')
-                # Draw white text on top
-                draw.text((x, y), line, font=font, fill='white')
-                y += font_size + line_spacing
-            return np.array(img)
-        # Step 4: Create text clip from function
-        text_clip = ImageClip(make_text_frame(0), duration=video.duration)
-        # Step 5: Composite video with text
-        final_video = CompositeVideoClip([video, text_clip])
-        # Step 5.5: Add voice narration if provided
-        if audio_path and os.path.exists(audio_path):
-            try:
-                print("🎤 Adding audio track...")
-                audio_clip = AudioFileClip(audio_path)
-                # Use the shorter duration between video and audio
-                audio_duration = min(audio_clip.duration, final_video.duration)
-                audio_clip = audio_clip.subclip(0, audio_duration)
-                final_video = final_video.set_audio(audio_clip)
-                print("✅ Audio added successfully")
-            except Exception as audio_error:
-                print(f"⚠️ Could not add audio: {audio_error}")
-                print("⚠️ Continuing without audio...")
-                # Continue without audio rather than failing
-        # Step 6: Export final video
-        print("📦 Exporting video (this may take 30-60s)...")
-        final_video.write_videofile(
-            output_path,
-            codec='libx264',
-            audio_codec='aac',
-            temp_audiofile='temp-audio.m4a',
-            remove_temp=True,
-            fps=24,
-            preset='ultrafast',  # Faster encoding
-            threads=4,
-            logger=None,  # Suppress verbose output
-            verbose=False
-        )
-        print(f"✅ Video export complete! ({time.time() - processing_start:.1f}s total)")
-        # Cleanup
-        video.close()
-        final_video.close()
-        os.unlink(temp_video.name)
-        return {
-            "success": True,
-            "output_path": output_path,
-            "message": "Video created successfully!"
-        }
-    except Exception as e:
-        return {
-            "success": False,
-            "output_path": None,
-            "message": f"Error creating video: {str(e)}"
-        }
-# Initialize the MCP-powered agent
 def initialize_agent():
     """Initialize the CodeAgent with MCP capabilities"""
     try:
-        # Use Hugging Face Inference API for the agent's LLM
-        model = InferenceClient(token=os.getenv("HF_TOKEN"))
-        # Create agent with custom tools
         agent = CodeAgent(
             tools=[generate_quote_tool, search_pexels_video_tool, generate_voice_commentary_tool, create_quote_video_tool],
             model=model,
-            additional_authorized_imports=["requests", "openai", "random", "tempfile", "os", "google.generativeai"],
             max_steps=15
         )
-        # Add MCP client if available
         if mcp_enabled:
             agent.mcp_clients = [mcp_client]
@@ -551,154 +376,130 @@ def initialize_agent():
     except Exception as e:
         return None, f"Agent initialization error: {str(e)}"
-# Initialize agent
 agent, agent_error = initialize_agent()
 def mcp_agent_pipeline(niche, style, num_variations=1):
     """
-    MCP-POWERED AUTONOMOUS AGENT PIPELINE
-    Uses smolagents with proper MCP server integration
-    Generates multiple video variations with Gemini-powered quotes
     """
-    status_log = []
-    status_log.append("🤖 **MCP AGENT STARTING**\n")
-    if agent_error:
-        status_log.append(f"❌ Agent initialization failed: {agent_error}")
-        status_log.append("\n🔄 Falling back to direct tool execution...\n")
-        return fallback_pipeline(niche, style, num_variations)
     try:
-        # STEP 1: Agent receives task
-        status_log.append("📋 **TASK RECEIVED:**")
-        status_log.append(f"   → Generate {niche} quote with {style} aesthetic")
-        status_log.append(f"   → Create {num_variations} video variations")
-        status_log.append("")
-        # STEP 2: Agent executes quote generation with Gemini
-        status_log.append("🧠 **GEMINI AI: generate_quote_tool**")
-        quote = generate_quote_tool(niche, style)
-        if "Error" in quote:
-            return "\n".join(status_log) + f"\n❌ Failed: {quote}", []
-        status_log.append(f"   ✅ Generated: \"{quote[:100]}...\"" if len(quote) > 100 else f"   ✅ Generated: \"{quote}\"\n")
-        # STEP 3: Search for multiple videos
-        status_log.append(f"🔍 **MCP TOOL: search_pexels_video_tool (x{num_variations})**")
-        status_log.append(f"   ⏳ Finding {num_variations} different videos...")
-        video_results = []
-        for i in range(num_variations):
-            video_result = search_pexels_video_tool(style, niche)
-            if video_result["success"]:
-                video_results.append(video_result)
-                status_log.append(f"   ✅ Video {i+1}: {video_result['search_query']}")
-        if not video_results:
-            return "\n".join(status_log) + "\n❌ No videos found", []
-        status_log.append("")
-        # STEP 4: Create multiple video variations
-        status_log.append(f"🎬 **MCP TOOL: create_quote_video_tool (x{len(video_results)})**")
-        status_log.append(f"   ⏳ Creating {len(video_results)} video variations in parallel...")
         output_dir = "/tmp/quote_videos"
         gallery_dir = "/data/gallery_videos"
         os.makedirs(output_dir, exist_ok=True)
         os.makedirs(gallery_dir, exist_ok=True)
-        import time
         timestamp = int(time.time())
-        # Use threading for parallel Modal calls
-        import threading
-        import queue
-        results_queue = queue.Queue()
-        def create_single_video(index, video_result):
-            output_filename = f"quote_video_v{index+1}_{timestamp}.mp4"
-            output_path = os.path.join(output_dir, output_filename)
-            creation_result = create_quote_video_tool(
-                video_result["video_url"],
-                quote,
-                output_path,
-                None
-            )
-            results_queue.put((index, creation_result, output_path))
-        # Start all threads
-        threads = []
-        for i, video_result in enumerate(video_results):
-            thread = threading.Thread(target=create_single_video, args=(i, video_result))
-            thread.start()
-            threads.append(thread)
-        # Wait for all to complete
-        for thread in threads:
-            thread.join()
-        # Collect results
-        created_videos = []
-        all_results = []
-        while not results_queue.empty():
-            all_results.append(results_queue.get())
-        # Sort by index
-        all_results.sort(key=lambda x: x[0])
-        # Process results
-        for index, creation_result, output_path in all_results:
-            if creation_result["success"]:
-                created_videos.append(output_path)
-                status_log.append(f"   ✅ Variation {index+1} created!")
-                # Copy to gallery
-                import shutil
-                gallery_filename = f"gallery_{timestamp}_v{index+1}.mp4"
-                gallery_path = os.path.join(gallery_dir, gallery_filename)
-                try:
-                    shutil.copy2(output_path, gallery_path)
-                except:
-                    pass
-            else:
-                    error_msg = creation_result.get("message", "Unknown error")
-                    status_log.append(f"   ⚠️ Variation {i+1} failed: {error_msg}")
-        if not created_videos:
-            status_log.append("\n❌ All video creations failed")
-            return "\n".join(status_log), []
-        status_log.append("")
-        # STEP 5: Integration status
-        status_log.append("🔗 **AI INTEGRATIONS:**")
-        status_log.append("   ✅ Gemini API - Quote generation with variety tracking")
-        status_log.append("   ✅ Pexels API - Video search")
-        status_log.append("   ✅ Modal Compute - Fast video processing")
-        if mcp_enabled:
-            status_log.append("   ✅ MCP Server - abidlabs-mcp-tools.hf.space")
-        status_log.append("")
-        # STEP 6: Success!
-        status_log.append("✨ **PIPELINE COMPLETE!**")
-        status_log.append(f"   🎬 Created {len(created_videos)} unique video variations")
-        status_log.append(f"   📥 Choose your favorite and download!")
-        final_status = "\n".join(status_log)
-        return final_status, created_videos
     except Exception as e:
-        status_log.append(f"\n❌ Pipeline error: {str(e)}")
-        return "\n".join(status_log), []
 def fallback_pipeline(niche, style, num_variations=1):
-    """Fallback pipeline if MCP agent fails"""
     status_log = []
     status_log.append("🔄 **FALLBACK MODE (Direct Tool Execution)**\n")
@@ -709,7 +510,7 @@ def fallback_pipeline(niche, style, num_variations=1):
     if "Error" in quote:
         return "\n".join(status_log) + f"\n❌ {quote}", []
-    status_log.append(f"   ✅ Quote generated\n")
     # Search videos
     status_log.append(f"🔍 Searching for {num_variations} videos...")
@@ -727,11 +528,10 @@ def fallback_pipeline(niche, style, num_variations=1):
     # Create videos
     status_log.append("🎬 Creating videos...")
     output_dir = "/tmp/quote_videos"
-    gallery_dir = "/data/gallery_videos"  # HF persistent storage
     os.makedirs(output_dir, exist_ok=True)
     os.makedirs(gallery_dir, exist_ok=True)
-    import time
     timestamp = int(time.time())
     created_videos = []
@@ -743,20 +543,18 @@ def fallback_pipeline(niche, style, num_variations=1):
             video_result["video_url"],
             quote,
             output_path,
-            None  # No audio
         )
         if creation_result["success"]:
             created_videos.append(creation_result["output_path"])
-            # Copy to gallery
-            import shutil
             gallery_filename = f"gallery_{timestamp}_v{i+1}.mp4"
             gallery_path = os.path.join(gallery_dir, gallery_filename)
             try:
                 shutil.copy2(creation_result["output_path"], gallery_path)
-            except:
-                pass
         else:
             error_msg = creation_result.get("message", "Unknown error")
             status_log.append(f"   ❌ Video {i+1} error: {error_msg}")
@@ -769,7 +567,10 @@ def fallback_pipeline(niche, style, num_variations=1):
     return "\n".join(status_log), created_videos
-# Gradio Interface
 with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
     # 🎬 AIQuoteClipGenerator
@@ -777,40 +578,38 @@ with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.the
     **Key Features:**
     - 🌟 **Gemini AI:** No more repetitive quotes! Smart variety tracking
-    - 🔗 **MCP Server:** smolagents framework integration
-    - 🛠️ **4 Custom MCP Tools:** Quote + Video search + Video creation
-    - 🤖 **Agent Reasoning:** Autonomous task execution
     - ⚡ **Modal Processing:** 4-8x faster video creation
     - 🎨 **Multiple Variations:** Get different video styles
     """)
-    # Example Gallery - Instagram-style grid
     with gr.Accordion("📸 Example Gallery - Recent Videos", open=True):
         gr.Markdown("See what others have created! Updates automatically after generation.")
-        # First row - 3 videos
         with gr.Row():
             gallery_video1 = gr.Video(label="", height=300, show_label=False, interactive=False)
             gallery_video2 = gr.Video(label="", height=300, show_label=False, interactive=False)
             gallery_video3 = gr.Video(label="", height=300, show_label=False, interactive=False)
-        # Second row - 3 videos
         with gr.Row():
             gallery_video4 = gr.Video(label="", height=300, show_label=False, interactive=False)
             gallery_video5 = gr.Video(label="", height=300, show_label=False, interactive=False)
             gallery_video6 = gr.Video(label="", height=300, show_label=False, interactive=False)
-    # Function to load gallery videos
     def load_gallery_videos():
         gallery_output_dir = "/data/gallery_videos"
         os.makedirs(gallery_output_dir, exist_ok=True)
         import glob
-        existing_videos = sorted(glob.glob(f"{gallery_output_dir}/*.mp4"),
-                                key=os.path.getmtime, reverse=True)[:6]
-        # Return 6 videos (None for empty slots)
         videos = [None] * 6
         for i, video_path in enumerate(existing_videos):
             if i < 6:
@@ -879,24 +678,22 @@ with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.the
     - 🌟 **Gemini AI** - Eliminates repetitive quotes with smart history tracking
     - 🎨 **Multiple Variations** - Get 1-3 different videos to choose from
     - ⚡ **Modal Processing** - 4-8x faster with serverless compute
-    - 🎯 **4 MCP Tools** - Quote (Gemini), Video Search, Voice, Video Creation
     ### 🏆 Hackathon: MCP 1st Birthday
     **Track:** Track 2 - MCP in Action
     **Category:** Productivity Tools
     **Built with:** Gradio + smolagents + Gemini + OpenAI + Pexels + Modal + ElevenLabs + MCP
     """)
     def process_and_display(niche, style, num_variations):
-        status, videos = mcp_agent_pipeline(niche, style, num_variations)
-        # Return up to 3 videos, None for unused slots
         v1 = videos[0] if len(videos) > 0 else None
         v2 = videos[1] if len(videos) > 1 else None
         v3 = videos[2] if len(videos) > 2 else None
-        # Load updated gallery (6 videos)
         gallery_vids = load_gallery_videos()
         return [status, v1, v2, v3] + gallery_vids
@@ -911,7 +708,6 @@ with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.the
         ]
     )
-    # Load gallery on page load
     demo.load(
         load_gallery_videos,
         outputs=[
@@ -922,4 +718,3 @@ with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.the
 if __name__ == "__main__":
     demo.launch(allowed_paths=["/data/gallery_videos"])

 import requests
 import random
 import tempfile
+import json
+import time
+import shutil
 from openai import OpenAI
+from smolagents import CodeAgent, MCPClient, tool, HfApiModel
+from huggingface_hub import InferenceClient  # still imported if you need it elsewhere
 from moviepy.editor import VideoFileClip, ImageClip, CompositeVideoClip, AudioFileClip
 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 from elevenlabs import ElevenLabs, VoiceSettings
     print(f"MCP initialization warning: {e}")
     mcp_enabled = False
+# -----------------------
+# TOOLS
+# -----------------------
 @tool
 def generate_quote_tool(niche: str, style: str) -> str:
     """
     Returns:
         A powerful, unique quote string
     """
     try:
         result = hybrid_quote_generator.generate_quote(niche, style, prefer_gemini=True)
     except Exception as e:
         return f"Error generating quote: {str(e)}"
 @tool
 def search_pexels_video_tool(style: str, niche: str) -> dict:
     """
             "error": str(e)
         }
 @tool
 def generate_voice_commentary_tool(quote_text: str, niche: str, output_path: str) -> dict:
     """
     Generate insightful voice commentary explaining the deeper meaning of the quote.
     Uses Gemini to create thoughtful explanation, then ElevenLabs to voice it.
     This adds VALUE - not just reading what's already on screen.
     """
     try:
         import google.generativeai as genai
         explanation_prompt = f"""Given this {niche} quote:
 - Make it thought-provoking
 - Don't start with "This quote..." - dive into the insight
 Return ONLY the commentary, nothing else."""
         genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
+        model = genai.GenerativeModel('gemini-1.5-flash')
         response = model.generate_content(
             explanation_prompt,
         explanation = response.text.strip().strip('"').strip("'")
         print(f"📝 Commentary: {explanation}")
         audio = elevenlabs_client.text_to_speech.convert(
             text=explanation,
+            voice_id="pNInz6obpgDQGcFmaJgB",
             model_id="eleven_multilingual_v2",
             voice_settings=VoiceSettings(
+                stability=0.6,
                 similarity_boost=0.8,
+                style=0.6,
                 use_speaker_boost=True
             )
         )
         with open(output_path, 'wb') as f:
             for chunk in audio:
                 f.write(chunk)
             "message": f"Error creating commentary: {str(e)}"
         }
 @tool
 def create_quote_video_tool(video_url: str, quote_text: str, output_path: str, audio_path: str = None) -> dict:
     """
     Create a final quote video by overlaying text on the background video.
     Uses Modal for fast processing (4-8x faster) with local fallback.
     Optionally adds voice narration audio.
     """
     modal_endpoint = os.getenv("MODAL_ENDPOINT_URL")
     if modal_endpoint:
         try:
             import base64
             print("🚀 Processing on Modal (fast!)...")
             audio_b64 = None
             if audio_path and os.path.exists(audio_path):
                 with open(audio_path, 'rb') as f:
                     audio_b64 = base64.b64encode(audio_bytes).decode()
                 print(f"   🎤 Including voice commentary audio ({len(audio_bytes)} bytes)")
             response = requests.post(
                 modal_endpoint,
                 json={
                     "video_url": video_url,
                     "quote_text": quote_text,
+                    "audio_b64": audio_b64
                 },
+                timeout=120
             )
             if response.status_code == 200:
                 result = response.json()
                 if result.get("success"):
                     video_b64 = result["video"]
                     video_bytes = base64.b64decode(video_b64)
                     with open(output_path, 'wb') as f:
                         f.write(video_bytes)
             else:
                 print(f"⚠️ Modal HTTP error: {response.status_code}")
             print("⚠️ Modal failed, falling back to local processing...")
         except requests.Timeout:
     else:
         print("ℹ️ MODAL_ENDPOINT_URL not configured, using local processing")
+    # For hackathon deploy: avoid heavy local MoviePy on Spaces to prevent hangs
+    print("🔧 Local processing disabled on this deployment.")
     return {
         "success": False,
         "output_path": None,
         "message": "Local processing disabled - please configure Modal for video generation. Deploy Modal with: modal deploy modal_video_processing.py"
     }
+# -----------------------
+# AGENT INITIALIZATION
+# -----------------------
 def initialize_agent():
     """Initialize the CodeAgent with MCP capabilities"""
     try:
+        # Use Hugging Face API model via HfApiModel
+        hf_token = os.getenv("HF_TOKEN")
+        model_id = os.getenv("HF_MODEL_ID", "meta-llama/Llama-3.3-70B-Instruct")
+        model = HfApiModel(model_id=model_id, token=hf_token)
         agent = CodeAgent(
             tools=[generate_quote_tool, search_pexels_video_tool, generate_voice_commentary_tool, create_quote_video_tool],
             model=model,
+            additional_authorized_imports=[
+                "requests",
+                "openai",
+                "random",
+                "tempfile",
+                "os",
+                "google.generativeai",
+                "json"
+            ],
             max_steps=15
         )
         if mcp_enabled:
             agent.mcp_clients = [mcp_client]
     except Exception as e:
         return None, f"Agent initialization error: {str(e)}"
 agent, agent_error = initialize_agent()
+# -----------------------
+# PIPELINES
+# -----------------------
 def mcp_agent_pipeline(niche, style, num_variations=1):
     """
+    MAIN PIPELINE: uses smolagents CodeAgent.run to plan & call tools.
+    The agent:
+      - calls generate_quote_tool
+      - calls search_pexels_video_tool multiple times
+      - calls create_quote_video_tool for each variation
+      - returns JSON with status_log + video_paths
     """
+    base_log = ["🤖 **MCP AGENT RUN**"]
+    if agent_error or agent is None:
+        base_log.append(f"❌ Agent initialization failed: {agent_error}")
+        base_log.append("🔄 Falling back to direct tool pipeline...")
+        status, vids = fallback_pipeline(niche, style, num_variations)
+        return "\n".join(base_log + [status]), vids
     try:
         output_dir = "/tmp/quote_videos"
         gallery_dir = "/data/gallery_videos"
         os.makedirs(output_dir, exist_ok=True)
         os.makedirs(gallery_dir, exist_ok=True)
         timestamp = int(time.time())
+        base_prefix = f"{output_dir}/agent_{timestamp}_v"
+        user_task = f"""
+You are an autonomous Python agent helping creators generate short vertical quote videos.
+Niche: {niche}
+Style: {style}
+Number of variations: {num_variations}
+You have these TOOLS already available in this environment:
+1. generate_quote_tool(niche: str, style: str) -> str
+   - Returns a unique quote as plain text.
+2. search_pexels_video_tool(style: str, niche: str) -> dict
+   - Returns a dict with at least:
+     - "video_url": str or None
+     - "success": bool
+3. create_quote_video_tool(video_url: str, quote_text: str, output_path: str, audio_path: str | None = None) -> dict
+   - Downloads a video, overlays the quote, and writes a video file to output_path.
+   - Returns a dict with at least:
+     - "success": bool
+     - "output_path": str | None
+You MAY also have access to external MCP tools through your mcp_clients attribute; you can call them if helpful (e.g. logging, inspiration, etc.), but they are optional.
+Your job:
+1. Call generate_quote_tool once with the given niche and style to obtain quote_text.
+2. For each variation i from 1 to {num_variations}, call search_pexels_video_tool(style, niche) to get a background video.
+3. For each successful search result, create an output path EXACTLY as:
+   "{base_prefix}{{i}}.mp4"  where i is the variation index (1-based).
+4. Call create_quote_video_tool(video_url, quote_text, output_path) for each variation.
+5. Only keep variations where create_quote_video_tool returns success == True and a non-empty output_path.
+6. Build a human-readable status_log string summarizing what you did (which tools were called, success/failures).
+7. Return ONLY a valid JSON object of the form:
+{{
+  "status_log": "multi-line human readable description of what you did",
+  "video_paths": [
+    "{base_prefix}1.mp4",
+    "... only include paths that actually succeeded ..."
+  ]
+}}
+CRITICAL:
+- Do not wrap the JSON in markdown or backticks.
+- Do not add extra keys.
+- Do not print anything besides the JSON.
+"""
+        agent_result = agent.run(user_task)
+        try:
+            parsed = json.loads(agent_result)
+        except Exception as parse_err:
+            raise ValueError(f"Agent output was not valid JSON: {parse_err}\nRaw: {agent_result[:500]}")
+        status_log = parsed.get("status_log", "")
+        video_paths = parsed.get("video_paths", [])
+        # Keep only existing paths
+        valid_paths = [
+            p for p in video_paths
+            if isinstance(p, str) and os.path.exists(p)
+        ]
+        if not valid_paths:
+            raise ValueError("Agent returned no valid video paths or files do not exist on disk.")
+        # Copy to gallery directory
+        for idx, path in enumerate(valid_paths):
+            try:
+                filename = os.path.basename(path)
+                gallery_path = os.path.join(gallery_dir, f"gallery_{timestamp}_v{idx+1}_{filename}")
+                shutil.copy2(path, gallery_path)
+            except Exception as e:
+                print(f"⚠️ Failed to copy to gallery for {path}: {e}")
+        full_status = "\n".join(base_log + [status_log])
+        return full_status, valid_paths[:3]
     except Exception as e:
+        # Hard fallback if anything goes wrong
+        fallback_status, fallback_videos = fallback_pipeline(niche, style, num_variations)
+        combined_status = "\n".join(
+            base_log
+            + [f"��️ Agent pipeline error: {str(e)}", "", "🔄 Switched to fallback pipeline:", fallback_status]
+        )
+        return combined_status, fallback_videos
 def fallback_pipeline(niche, style, num_variations=1):
+    """Fallback pipeline if MCP agent fails: direct tool calls."""
     status_log = []
     status_log.append("🔄 **FALLBACK MODE (Direct Tool Execution)**\n")
     if "Error" in quote:
         return "\n".join(status_log) + f"\n❌ {quote}", []
+    status_log.append("   ✅ Quote generated\n")
     # Search videos
     status_log.append(f"🔍 Searching for {num_variations} videos...")
     # Create videos
     status_log.append("🎬 Creating videos...")
     output_dir = "/tmp/quote_videos"
+    gallery_dir = "/data/gallery_videos"
     os.makedirs(output_dir, exist_ok=True)
     os.makedirs(gallery_dir, exist_ok=True)
     timestamp = int(time.time())
     created_videos = []
             video_result["video_url"],
             quote,
             output_path,
+            None
         )
         if creation_result["success"]:
             created_videos.append(creation_result["output_path"])
             gallery_filename = f"gallery_{timestamp}_v{i+1}.mp4"
             gallery_path = os.path.join(gallery_dir, gallery_filename)
             try:
                 shutil.copy2(creation_result["output_path"], gallery_path)
+            except Exception as e:
+                print(f"⚠️ Gallery copy failed: {e}")
         else:
             error_msg = creation_result.get("message", "Unknown error")
             status_log.append(f"   ❌ Video {i+1} error: {error_msg}")
     return "\n".join(status_log), created_videos
+# -----------------------
+# GRADIO UI
+# -----------------------
 with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
     # 🎬 AIQuoteClipGenerator
     **Key Features:**
     - 🌟 **Gemini AI:** No more repetitive quotes! Smart variety tracking
+    - 🔗 **MCP Server Usage:** smolagents CodeAgent + MCP client
+    - 🛠️ **4 Custom Tools:** Quote + Video search + Voice (optional) + Video creation
+    - 🤖 **Agent Reasoning:** Autonomous task execution via CodeAgent.run
     - ⚡ **Modal Processing:** 4-8x faster video creation
     - 🎨 **Multiple Variations:** Get different video styles
     """)
+    # Example Gallery
     with gr.Accordion("📸 Example Gallery - Recent Videos", open=True):
         gr.Markdown("See what others have created! Updates automatically after generation.")
         with gr.Row():
             gallery_video1 = gr.Video(label="", height=300, show_label=False, interactive=False)
             gallery_video2 = gr.Video(label="", height=300, show_label=False, interactive=False)
             gallery_video3 = gr.Video(label="", height=300, show_label=False, interactive=False)
         with gr.Row():
             gallery_video4 = gr.Video(label="", height=300, show_label=False, interactive=False)
             gallery_video5 = gr.Video(label="", height=300, show_label=False, interactive=False)
             gallery_video6 = gr.Video(label="", height=300, show_label=False, interactive=False)
     def load_gallery_videos():
         gallery_output_dir = "/data/gallery_videos"
         os.makedirs(gallery_output_dir, exist_ok=True)
         import glob
+        existing_videos = sorted(
+            glob.glob(f"{gallery_output_dir}/*.mp4"),
+            key=os.path.getmtime,
+            reverse=True
+        )[:6]
         videos = [None] * 6
         for i, video_path in enumerate(existing_videos):
             if i < 6:
     - 🌟 **Gemini AI** - Eliminates repetitive quotes with smart history tracking
     - 🎨 **Multiple Variations** - Get 1-3 different videos to choose from
     - ⚡ **Modal Processing** - 4-8x faster with serverless compute
+    - 🤖 **Real Agent** - smolagents CodeAgent orchestrates tool calls
+    - 🔗 **MCP Usage** - Agent wired with MCP client for external tools
     ### 🏆 Hackathon: MCP 1st Birthday
     **Track:** Track 2 - MCP in Action
     **Category:** Productivity Tools
     **Built with:** Gradio + smolagents + Gemini + OpenAI + Pexels + Modal + ElevenLabs + MCP
     """)
     def process_and_display(niche, style, num_variations):
+        status, videos = mcp_agent_pipeline(niche, style, int(num_variations))
         v1 = videos[0] if len(videos) > 0 else None
         v2 = videos[1] if len(videos) > 1 else None
         v3 = videos[2] if len(videos) > 2 else None
         gallery_vids = load_gallery_videos()
         return [status, v1, v2, v3] + gallery_vids
         ]
     )
     demo.load(
         load_gallery_videos,
         outputs=[
 if __name__ == "__main__":
     demo.launch(allowed_paths=["/data/gallery_videos"])