ladybug11 commited on
Commit
aa1565f
Β·
1 Parent(s): c3ca514
Files changed (1) hide show
  1. app.py +158 -363
app.py CHANGED
@@ -3,12 +3,15 @@ import os
3
  import requests
4
  import random
5
  import tempfile
 
 
 
 
6
  from openai import OpenAI
7
- from smolagents import CodeAgent, MCPClient, tool
8
- from huggingface_hub import InferenceClient
9
  from moviepy.editor import VideoFileClip, ImageClip, CompositeVideoClip, AudioFileClip
10
  from PIL import Image, ImageDraw, ImageFont
11
- import textwrap
12
  import numpy as np
13
  from elevenlabs import ElevenLabs, VoiceSettings
14
 
@@ -34,7 +37,10 @@ except Exception as e:
34
  print(f"MCP initialization warning: {e}")
35
  mcp_enabled = False
36
 
37
- # Define custom tools for the MCP agent
 
 
 
38
  @tool
39
  def generate_quote_tool(niche: str, style: str) -> str:
40
  """
@@ -48,7 +54,6 @@ def generate_quote_tool(niche: str, style: str) -> str:
48
  Returns:
49
  A powerful, unique quote string
50
  """
51
-
52
  try:
53
  result = hybrid_quote_generator.generate_quote(niche, style, prefer_gemini=True)
54
 
@@ -71,6 +76,7 @@ def generate_quote_tool(niche: str, style: str) -> str:
71
  except Exception as e:
72
  return f"Error generating quote: {str(e)}"
73
 
 
74
  @tool
75
  def search_pexels_video_tool(style: str, niche: str) -> dict:
76
  """
@@ -193,24 +199,15 @@ def search_pexels_video_tool(style: str, niche: str) -> dict:
193
  "error": str(e)
194
  }
195
 
 
196
  @tool
197
  def generate_voice_commentary_tool(quote_text: str, niche: str, output_path: str) -> dict:
198
  """
199
  Generate insightful voice commentary explaining the deeper meaning of the quote.
200
  Uses Gemini to create thoughtful explanation, then ElevenLabs to voice it.
201
  This adds VALUE - not just reading what's already on screen.
202
-
203
- Args:
204
- quote_text: The quote to explain
205
- niche: The niche/category for context
206
- output_path: Path where to save the audio file
207
-
208
- Returns:
209
- Dictionary with success status, output path, and the explanation text
210
  """
211
-
212
  try:
213
- # Step 1: Generate explanation using Gemini
214
  import google.generativeai as genai
215
 
216
  explanation_prompt = f"""Given this {niche} quote:
@@ -227,14 +224,9 @@ Requirements:
227
  - Make it thought-provoking
228
  - Don't start with "This quote..." - dive into the insight
229
 
230
- Example:
231
- Quote: "Between stimulus and response there is a space."
232
- Good: "In that pause lies your freedom. That's where you choose who you become, not who your habits make you."
233
-
234
  Return ONLY the commentary, nothing else."""
235
-
236
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
237
- model = genai.GenerativeModel('gemini-1.5-flash') # Updated model name
238
 
239
  response = model.generate_content(
240
  explanation_prompt,
@@ -247,20 +239,18 @@ Return ONLY the commentary, nothing else."""
247
  explanation = response.text.strip().strip('"').strip("'")
248
  print(f"πŸ“ Commentary: {explanation}")
249
 
250
- # Step 2: Generate voice using ElevenLabs
251
  audio = elevenlabs_client.text_to_speech.convert(
252
  text=explanation,
253
- voice_id="pNInz6obpgDQGcFmaJgB", # Adam - thoughtful and clear
254
  model_id="eleven_multilingual_v2",
255
  voice_settings=VoiceSettings(
256
- stability=0.6, # Stable for narration
257
  similarity_boost=0.8,
258
- style=0.6, # Expressive for commentary
259
  use_speaker_boost=True
260
  )
261
  )
262
 
263
- # Save audio
264
  with open(output_path, 'wb') as f:
265
  for chunk in audio:
266
  f.write(chunk)
@@ -280,34 +270,22 @@ Return ONLY the commentary, nothing else."""
280
  "message": f"Error creating commentary: {str(e)}"
281
  }
282
 
 
283
  @tool
284
  def create_quote_video_tool(video_url: str, quote_text: str, output_path: str, audio_path: str = None) -> dict:
285
  """
286
  Create a final quote video by overlaying text on the background video.
287
  Uses Modal for fast processing (4-8x faster) with local fallback.
288
  Optionally adds voice narration audio.
289
-
290
- Args:
291
- video_url: URL of the background video from Pexels
292
- quote_text: The quote text to overlay
293
- output_path: Path where to save the final video
294
- audio_path: Optional path to audio file for voice narration
295
-
296
- Returns:
297
- Dictionary with success status and output path
298
  """
299
-
300
- # Check if Modal is configured
301
  modal_endpoint = os.getenv("MODAL_ENDPOINT_URL")
302
 
303
  if modal_endpoint:
304
  try:
305
- import requests
306
  import base64
307
 
308
  print("πŸš€ Processing on Modal (fast!)...")
309
 
310
- # Prepare audio data if present
311
  audio_b64 = None
312
  if audio_path and os.path.exists(audio_path):
313
  with open(audio_path, 'rb') as f:
@@ -315,26 +293,23 @@ def create_quote_video_tool(video_url: str, quote_text: str, output_path: str, a
315
  audio_b64 = base64.b64encode(audio_bytes).decode()
316
  print(f" 🎀 Including voice commentary audio ({len(audio_bytes)} bytes)")
317
 
318
- # Call Modal endpoint with longer timeout
319
  response = requests.post(
320
  modal_endpoint,
321
  json={
322
  "video_url": video_url,
323
  "quote_text": quote_text,
324
- "audio_b64": audio_b64 # Pass audio as base64
325
  },
326
- timeout=120 # 2 minute timeout
327
  )
328
 
329
  if response.status_code == 200:
330
  result = response.json()
331
 
332
  if result.get("success"):
333
- # Decode video bytes
334
  video_b64 = result["video"]
335
  video_bytes = base64.b64decode(video_b64)
336
 
337
- # Save to output path
338
  with open(output_path, 'wb') as f:
339
  f.write(video_bytes)
340
 
@@ -350,7 +325,6 @@ def create_quote_video_tool(video_url: str, quote_text: str, output_path: str, a
350
  else:
351
  print(f"⚠️ Modal HTTP error: {response.status_code}")
352
 
353
- # If Modal failed, fall through to local processing
354
  print("⚠️ Modal failed, falling back to local processing...")
355
 
356
  except requests.Timeout:
@@ -360,190 +334,41 @@ def create_quote_video_tool(video_url: str, quote_text: str, output_path: str, a
360
  else:
361
  print("ℹ️ MODAL_ENDPOINT_URL not configured, using local processing")
362
 
363
- # LOCAL PROCESSING - Skip if taking too long
364
- print("πŸ”§ Processing locally (may be slow)...")
365
- print("⚠️ WARNING: Local processing can hang on HF Spaces!")
366
- print("⚠️ Consider setting up Modal for 4-8x faster processing")
367
-
368
- # Return error instead of hanging
369
  return {
370
  "success": False,
371
  "output_path": None,
372
  "message": "Local processing disabled - please configure Modal for video generation. Deploy Modal with: modal deploy modal_video_processing.py"
373
  }
374
-
375
- # LOCAL PROCESSING (Fallback or if Modal not configured)
376
- print("πŸ”§ Processing locally...")
377
-
378
- try:
379
- import time
380
- processing_start = time.time()
381
-
382
- # Step 1: Download the video
383
- response = requests.get(video_url, stream=True, timeout=30)
384
- response.raise_for_status()
385
-
386
- # Create temporary file for downloaded video
387
- temp_video = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
388
-
389
- with open(temp_video.name, 'wb') as f:
390
- for chunk in response.iter_content(chunk_size=8192):
391
- f.write(chunk)
392
-
393
- # Step 2: Load video with MoviePy
394
- video = VideoFileClip(temp_video.name)
395
-
396
- # Get video dimensions
397
- w, h = video.size
398
-
399
- # Step 3: Create text overlay using PIL
400
- def make_text_frame(t):
401
- """Generate a text frame using PIL"""
402
- # Create transparent image
403
- img = Image.new('RGBA', (w, h), (0, 0, 0, 0))
404
- draw = ImageDraw.Draw(img)
405
-
406
- # Calculate font size (2.5% of video height - smaller for better aesthetic)
407
- font_size = int(h * 0.025)
408
-
409
- # Try to load a font, fall back to default if needed
410
- try:
411
- # Try common fonts available on Linux
412
- font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
413
- except:
414
- try:
415
- font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", font_size)
416
- except:
417
- # Fall back to default font
418
- font = ImageFont.load_default()
419
-
420
- # Wrap text to fit width (60% of video width for better proportions)
421
- max_width = int(w * 0.6)
422
-
423
- # Manual text wrapping with better line length
424
- words = quote_text.split()
425
- lines = []
426
- current_line = []
427
-
428
- for word in words:
429
- test_line = ' '.join(current_line + [word])
430
- # Get text bbox to check width
431
- bbox = draw.textbbox((0, 0), test_line, font=font)
432
- text_width = bbox[2] - bbox[0]
433
-
434
- if text_width <= max_width:
435
- current_line.append(word)
436
- else:
437
- if current_line:
438
- lines.append(' '.join(current_line))
439
- current_line = [word]
440
- else:
441
- lines.append(word)
442
-
443
- if current_line:
444
- lines.append(' '.join(current_line))
445
-
446
- # Calculate total text height with better line spacing
447
- line_spacing = int(font_size * 0.4)
448
- text_block_height = len(lines) * (font_size + line_spacing)
449
-
450
- # Start y position (centered vertically)
451
- y = (h - text_block_height) // 2
452
-
453
- # Draw each line centered
454
- for line in lines:
455
- # Get text size
456
- bbox = draw.textbbox((0, 0), line, font=font)
457
- text_width = bbox[2] - bbox[0]
458
-
459
- # Center horizontally
460
- x = (w - text_width) // 2
461
-
462
- # Draw black outline (stroke) - thinner for smaller text
463
- outline_width = max(2, int(font_size * 0.08))
464
- for adj_x in range(-outline_width, outline_width + 1):
465
- for adj_y in range(-outline_width, outline_width + 1):
466
- draw.text((x + adj_x, y + adj_y), line, font=font, fill='black')
467
-
468
- # Draw white text on top
469
- draw.text((x, y), line, font=font, fill='white')
470
-
471
- y += font_size + line_spacing
472
-
473
- return np.array(img)
474
-
475
- # Step 4: Create text clip from function
476
- text_clip = ImageClip(make_text_frame(0), duration=video.duration)
477
-
478
- # Step 5: Composite video with text
479
- final_video = CompositeVideoClip([video, text_clip])
480
-
481
- # Step 5.5: Add voice narration if provided
482
- if audio_path and os.path.exists(audio_path):
483
- try:
484
- print("🎀 Adding audio track...")
485
- audio_clip = AudioFileClip(audio_path)
486
- # Use the shorter duration between video and audio
487
- audio_duration = min(audio_clip.duration, final_video.duration)
488
- audio_clip = audio_clip.subclip(0, audio_duration)
489
- final_video = final_video.set_audio(audio_clip)
490
- print("βœ… Audio added successfully")
491
- except Exception as audio_error:
492
- print(f"⚠️ Could not add audio: {audio_error}")
493
- print("⚠️ Continuing without audio...")
494
- # Continue without audio rather than failing
495
-
496
- # Step 6: Export final video
497
- print("πŸ“¦ Exporting video (this may take 30-60s)...")
498
- final_video.write_videofile(
499
- output_path,
500
- codec='libx264',
501
- audio_codec='aac',
502
- temp_audiofile='temp-audio.m4a',
503
- remove_temp=True,
504
- fps=24,
505
- preset='ultrafast', # Faster encoding
506
- threads=4,
507
- logger=None, # Suppress verbose output
508
- verbose=False
509
- )
510
-
511
- print(f"βœ… Video export complete! ({time.time() - processing_start:.1f}s total)")
512
-
513
- # Cleanup
514
- video.close()
515
- final_video.close()
516
- os.unlink(temp_video.name)
517
-
518
- return {
519
- "success": True,
520
- "output_path": output_path,
521
- "message": "Video created successfully!"
522
- }
523
-
524
- except Exception as e:
525
- return {
526
- "success": False,
527
- "output_path": None,
528
- "message": f"Error creating video: {str(e)}"
529
- }
530
 
531
- # Initialize the MCP-powered agent
 
 
 
532
  def initialize_agent():
533
  """Initialize the CodeAgent with MCP capabilities"""
534
  try:
535
- # Use Hugging Face Inference API for the agent's LLM
536
- model = InferenceClient(token=os.getenv("HF_TOKEN"))
 
 
537
 
538
- # Create agent with custom tools
539
  agent = CodeAgent(
540
  tools=[generate_quote_tool, search_pexels_video_tool, generate_voice_commentary_tool, create_quote_video_tool],
541
  model=model,
542
- additional_authorized_imports=["requests", "openai", "random", "tempfile", "os", "google.generativeai"],
 
 
 
 
 
 
 
 
543
  max_steps=15
544
  )
545
 
546
- # Add MCP client if available
547
  if mcp_enabled:
548
  agent.mcp_clients = [mcp_client]
549
 
@@ -551,154 +376,130 @@ def initialize_agent():
551
  except Exception as e:
552
  return None, f"Agent initialization error: {str(e)}"
553
 
554
- # Initialize agent
555
  agent, agent_error = initialize_agent()
556
 
 
 
 
 
557
  def mcp_agent_pipeline(niche, style, num_variations=1):
558
  """
559
- MCP-POWERED AUTONOMOUS AGENT PIPELINE
560
- Uses smolagents with proper MCP server integration
561
- Generates multiple video variations with Gemini-powered quotes
 
 
 
562
  """
 
563
 
564
- status_log = []
565
- status_log.append("πŸ€– **MCP AGENT STARTING**\n")
566
-
567
- if agent_error:
568
- status_log.append(f"❌ Agent initialization failed: {agent_error}")
569
- status_log.append("\nπŸ”„ Falling back to direct tool execution...\n")
570
- return fallback_pipeline(niche, style, num_variations)
571
 
572
  try:
573
- # STEP 1: Agent receives task
574
- status_log.append("πŸ“‹ **TASK RECEIVED:**")
575
- status_log.append(f" β†’ Generate {niche} quote with {style} aesthetic")
576
- status_log.append(f" β†’ Create {num_variations} video variations")
577
- status_log.append("")
578
-
579
- # STEP 2: Agent executes quote generation with Gemini
580
- status_log.append("🧠 **GEMINI AI: generate_quote_tool**")
581
- quote = generate_quote_tool(niche, style)
582
-
583
- if "Error" in quote:
584
- return "\n".join(status_log) + f"\n❌ Failed: {quote}", []
585
-
586
- status_log.append(f" βœ… Generated: \"{quote[:100]}...\"" if len(quote) > 100 else f" βœ… Generated: \"{quote}\"\n")
587
-
588
- # STEP 3: Search for multiple videos
589
- status_log.append(f"πŸ” **MCP TOOL: search_pexels_video_tool (x{num_variations})**")
590
- status_log.append(f" ⏳ Finding {num_variations} different videos...")
591
-
592
- video_results = []
593
- for i in range(num_variations):
594
- video_result = search_pexels_video_tool(style, niche)
595
- if video_result["success"]:
596
- video_results.append(video_result)
597
- status_log.append(f" βœ… Video {i+1}: {video_result['search_query']}")
598
-
599
- if not video_results:
600
- return "\n".join(status_log) + "\n❌ No videos found", []
601
-
602
- status_log.append("")
603
-
604
- # STEP 4: Create multiple video variations
605
- status_log.append(f"🎬 **MCP TOOL: create_quote_video_tool (x{len(video_results)})**")
606
- status_log.append(f" ⏳ Creating {len(video_results)} video variations in parallel...")
607
-
608
  output_dir = "/tmp/quote_videos"
609
  gallery_dir = "/data/gallery_videos"
610
  os.makedirs(output_dir, exist_ok=True)
611
  os.makedirs(gallery_dir, exist_ok=True)
612
 
613
- import time
614
  timestamp = int(time.time())
 
615
 
616
- # Use threading for parallel Modal calls
617
- import threading
618
- import queue
619
-
620
- results_queue = queue.Queue()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
621
 
622
- def create_single_video(index, video_result):
623
- output_filename = f"quote_video_v{index+1}_{timestamp}.mp4"
624
- output_path = os.path.join(output_dir, output_filename)
625
-
626
- creation_result = create_quote_video_tool(
627
- video_result["video_url"],
628
- quote,
629
- output_path,
630
- None
631
- )
632
-
633
- results_queue.put((index, creation_result, output_path))
634
-
635
- # Start all threads
636
- threads = []
637
- for i, video_result in enumerate(video_results):
638
- thread = threading.Thread(target=create_single_video, args=(i, video_result))
639
- thread.start()
640
- threads.append(thread)
641
-
642
- # Wait for all to complete
643
- for thread in threads:
644
- thread.join()
645
-
646
- # Collect results
647
- created_videos = []
648
- all_results = []
649
- while not results_queue.empty():
650
- all_results.append(results_queue.get())
651
-
652
- # Sort by index
653
- all_results.sort(key=lambda x: x[0])
654
-
655
- # Process results
656
- for index, creation_result, output_path in all_results:
657
- if creation_result["success"]:
658
- created_videos.append(output_path)
659
- status_log.append(f" βœ… Variation {index+1} created!")
660
-
661
- # Copy to gallery
662
- import shutil
663
- gallery_filename = f"gallery_{timestamp}_v{index+1}.mp4"
664
- gallery_path = os.path.join(gallery_dir, gallery_filename)
665
- try:
666
- shutil.copy2(output_path, gallery_path)
667
- except:
668
- pass
669
- else:
670
- error_msg = creation_result.get("message", "Unknown error")
671
- status_log.append(f" ⚠️ Variation {i+1} failed: {error_msg}")
672
 
673
- if not created_videos:
674
- status_log.append("\n❌ All video creations failed")
675
- return "\n".join(status_log), []
676
 
677
- status_log.append("")
 
 
 
 
678
 
679
- # STEP 5: Integration status
680
- status_log.append("πŸ”— **AI INTEGRATIONS:**")
681
- status_log.append(" βœ… Gemini API - Quote generation with variety tracking")
682
- status_log.append(" βœ… Pexels API - Video search")
683
- status_log.append(" βœ… Modal Compute - Fast video processing")
684
- if mcp_enabled:
685
- status_log.append(" βœ… MCP Server - abidlabs-mcp-tools.hf.space")
686
- status_log.append("")
687
 
688
- # STEP 6: Success!
689
- status_log.append("✨ **PIPELINE COMPLETE!**")
690
- status_log.append(f" 🎬 Created {len(created_videos)} unique video variations")
691
- status_log.append(f" πŸ“₯ Choose your favorite and download!")
 
 
 
 
692
 
693
- final_status = "\n".join(status_log)
694
- return final_status, created_videos
695
 
696
  except Exception as e:
697
- status_log.append(f"\n❌ Pipeline error: {str(e)}")
698
- return "\n".join(status_log), []
 
 
 
 
 
 
699
 
700
  def fallback_pipeline(niche, style, num_variations=1):
701
- """Fallback pipeline if MCP agent fails"""
702
  status_log = []
703
  status_log.append("πŸ”„ **FALLBACK MODE (Direct Tool Execution)**\n")
704
 
@@ -709,7 +510,7 @@ def fallback_pipeline(niche, style, num_variations=1):
709
  if "Error" in quote:
710
  return "\n".join(status_log) + f"\n❌ {quote}", []
711
 
712
- status_log.append(f" βœ… Quote generated\n")
713
 
714
  # Search videos
715
  status_log.append(f"πŸ” Searching for {num_variations} videos...")
@@ -727,11 +528,10 @@ def fallback_pipeline(niche, style, num_variations=1):
727
  # Create videos
728
  status_log.append("🎬 Creating videos...")
729
  output_dir = "/tmp/quote_videos"
730
- gallery_dir = "/data/gallery_videos" # HF persistent storage
731
  os.makedirs(output_dir, exist_ok=True)
732
  os.makedirs(gallery_dir, exist_ok=True)
733
 
734
- import time
735
  timestamp = int(time.time())
736
  created_videos = []
737
 
@@ -743,20 +543,18 @@ def fallback_pipeline(niche, style, num_variations=1):
743
  video_result["video_url"],
744
  quote,
745
  output_path,
746
- None # No audio
747
  )
748
 
749
  if creation_result["success"]:
750
  created_videos.append(creation_result["output_path"])
751
 
752
- # Copy to gallery
753
- import shutil
754
  gallery_filename = f"gallery_{timestamp}_v{i+1}.mp4"
755
  gallery_path = os.path.join(gallery_dir, gallery_filename)
756
  try:
757
  shutil.copy2(creation_result["output_path"], gallery_path)
758
- except:
759
- pass
760
  else:
761
  error_msg = creation_result.get("message", "Unknown error")
762
  status_log.append(f" ❌ Video {i+1} error: {error_msg}")
@@ -769,7 +567,10 @@ def fallback_pipeline(niche, style, num_variations=1):
769
 
770
  return "\n".join(status_log), created_videos
771
 
772
- # Gradio Interface
 
 
 
773
  with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.themes.Soft()) as demo:
774
  gr.Markdown("""
775
  # 🎬 AIQuoteClipGenerator
@@ -777,40 +578,38 @@ with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.the
777
 
778
  **Key Features:**
779
  - 🌟 **Gemini AI:** No more repetitive quotes! Smart variety tracking
780
- - πŸ”— **MCP Server:** smolagents framework integration
781
- - πŸ› οΈ **4 Custom MCP Tools:** Quote + Video search + Video creation
782
- - πŸ€– **Agent Reasoning:** Autonomous task execution
783
  - ⚑ **Modal Processing:** 4-8x faster video creation
784
  - 🎨 **Multiple Variations:** Get different video styles
785
-
786
  """)
787
 
788
- # Example Gallery - Instagram-style grid
789
  with gr.Accordion("πŸ“Έ Example Gallery - Recent Videos", open=True):
790
  gr.Markdown("See what others have created! Updates automatically after generation.")
791
 
792
- # First row - 3 videos
793
  with gr.Row():
794
  gallery_video1 = gr.Video(label="", height=300, show_label=False, interactive=False)
795
  gallery_video2 = gr.Video(label="", height=300, show_label=False, interactive=False)
796
  gallery_video3 = gr.Video(label="", height=300, show_label=False, interactive=False)
797
 
798
- # Second row - 3 videos
799
  with gr.Row():
800
  gallery_video4 = gr.Video(label="", height=300, show_label=False, interactive=False)
801
  gallery_video5 = gr.Video(label="", height=300, show_label=False, interactive=False)
802
  gallery_video6 = gr.Video(label="", height=300, show_label=False, interactive=False)
803
 
804
- # Function to load gallery videos
805
  def load_gallery_videos():
806
  gallery_output_dir = "/data/gallery_videos"
807
  os.makedirs(gallery_output_dir, exist_ok=True)
808
 
809
  import glob
810
- existing_videos = sorted(glob.glob(f"{gallery_output_dir}/*.mp4"),
811
- key=os.path.getmtime, reverse=True)[:6]
 
 
 
812
 
813
- # Return 6 videos (None for empty slots)
814
  videos = [None] * 6
815
  for i, video_path in enumerate(existing_videos):
816
  if i < 6:
@@ -879,24 +678,22 @@ with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.the
879
  - 🌟 **Gemini AI** - Eliminates repetitive quotes with smart history tracking
880
  - 🎨 **Multiple Variations** - Get 1-3 different videos to choose from
881
  - ⚑ **Modal Processing** - 4-8x faster with serverless compute
882
- - 🎯 **4 MCP Tools** - Quote (Gemini), Video Search, Voice, Video Creation
 
883
 
884
  ### πŸ† Hackathon: MCP 1st Birthday
885
  **Track:** Track 2 - MCP in Action
886
  **Category:** Productivity Tools
887
  **Built with:** Gradio + smolagents + Gemini + OpenAI + Pexels + Modal + ElevenLabs + MCP
888
-
889
  """)
890
 
891
  def process_and_display(niche, style, num_variations):
892
- status, videos = mcp_agent_pipeline(niche, style, num_variations)
893
 
894
- # Return up to 3 videos, None for unused slots
895
  v1 = videos[0] if len(videos) > 0 else None
896
  v2 = videos[1] if len(videos) > 1 else None
897
  v3 = videos[2] if len(videos) > 2 else None
898
 
899
- # Load updated gallery (6 videos)
900
  gallery_vids = load_gallery_videos()
901
 
902
  return [status, v1, v2, v3] + gallery_vids
@@ -911,7 +708,6 @@ with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.the
911
  ]
912
  )
913
 
914
- # Load gallery on page load
915
  demo.load(
916
  load_gallery_videos,
917
  outputs=[
@@ -922,4 +718,3 @@ with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.the
922
 
923
  if __name__ == "__main__":
924
  demo.launch(allowed_paths=["/data/gallery_videos"])
925
-
 
3
  import requests
4
  import random
5
  import tempfile
6
+ import json
7
+ import time
8
+ import shutil
9
+
10
  from openai import OpenAI
11
+ from smolagents import CodeAgent, MCPClient, tool, HfApiModel
12
+ from huggingface_hub import InferenceClient # still imported if you need it elsewhere
13
  from moviepy.editor import VideoFileClip, ImageClip, CompositeVideoClip, AudioFileClip
14
  from PIL import Image, ImageDraw, ImageFont
 
15
  import numpy as np
16
  from elevenlabs import ElevenLabs, VoiceSettings
17
 
 
37
  print(f"MCP initialization warning: {e}")
38
  mcp_enabled = False
39
 
40
+ # -----------------------
41
+ # TOOLS
42
+ # -----------------------
43
+
44
  @tool
45
  def generate_quote_tool(niche: str, style: str) -> str:
46
  """
 
54
  Returns:
55
  A powerful, unique quote string
56
  """
 
57
  try:
58
  result = hybrid_quote_generator.generate_quote(niche, style, prefer_gemini=True)
59
 
 
76
  except Exception as e:
77
  return f"Error generating quote: {str(e)}"
78
 
79
+
80
  @tool
81
  def search_pexels_video_tool(style: str, niche: str) -> dict:
82
  """
 
199
  "error": str(e)
200
  }
201
 
202
+
203
  @tool
204
  def generate_voice_commentary_tool(quote_text: str, niche: str, output_path: str) -> dict:
205
  """
206
  Generate insightful voice commentary explaining the deeper meaning of the quote.
207
  Uses Gemini to create thoughtful explanation, then ElevenLabs to voice it.
208
  This adds VALUE - not just reading what's already on screen.
 
 
 
 
 
 
 
 
209
  """
 
210
  try:
 
211
  import google.generativeai as genai
212
 
213
  explanation_prompt = f"""Given this {niche} quote:
 
224
  - Make it thought-provoking
225
  - Don't start with "This quote..." - dive into the insight
226
 
 
 
 
 
227
  Return ONLY the commentary, nothing else."""
 
228
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
229
+ model = genai.GenerativeModel('gemini-1.5-flash')
230
 
231
  response = model.generate_content(
232
  explanation_prompt,
 
239
  explanation = response.text.strip().strip('"').strip("'")
240
  print(f"πŸ“ Commentary: {explanation}")
241
 
 
242
  audio = elevenlabs_client.text_to_speech.convert(
243
  text=explanation,
244
+ voice_id="pNInz6obpgDQGcFmaJgB",
245
  model_id="eleven_multilingual_v2",
246
  voice_settings=VoiceSettings(
247
+ stability=0.6,
248
  similarity_boost=0.8,
249
+ style=0.6,
250
  use_speaker_boost=True
251
  )
252
  )
253
 
 
254
  with open(output_path, 'wb') as f:
255
  for chunk in audio:
256
  f.write(chunk)
 
270
  "message": f"Error creating commentary: {str(e)}"
271
  }
272
 
273
+
274
  @tool
275
  def create_quote_video_tool(video_url: str, quote_text: str, output_path: str, audio_path: str = None) -> dict:
276
  """
277
  Create a final quote video by overlaying text on the background video.
278
  Uses Modal for fast processing (4-8x faster) with local fallback.
279
  Optionally adds voice narration audio.
 
 
 
 
 
 
 
 
 
280
  """
 
 
281
  modal_endpoint = os.getenv("MODAL_ENDPOINT_URL")
282
 
283
  if modal_endpoint:
284
  try:
 
285
  import base64
286
 
287
  print("πŸš€ Processing on Modal (fast!)...")
288
 
 
289
  audio_b64 = None
290
  if audio_path and os.path.exists(audio_path):
291
  with open(audio_path, 'rb') as f:
 
293
  audio_b64 = base64.b64encode(audio_bytes).decode()
294
  print(f" 🎀 Including voice commentary audio ({len(audio_bytes)} bytes)")
295
 
 
296
  response = requests.post(
297
  modal_endpoint,
298
  json={
299
  "video_url": video_url,
300
  "quote_text": quote_text,
301
+ "audio_b64": audio_b64
302
  },
303
+ timeout=120
304
  )
305
 
306
  if response.status_code == 200:
307
  result = response.json()
308
 
309
  if result.get("success"):
 
310
  video_b64 = result["video"]
311
  video_bytes = base64.b64decode(video_b64)
312
 
 
313
  with open(output_path, 'wb') as f:
314
  f.write(video_bytes)
315
 
 
325
  else:
326
  print(f"⚠️ Modal HTTP error: {response.status_code}")
327
 
 
328
  print("⚠️ Modal failed, falling back to local processing...")
329
 
330
  except requests.Timeout:
 
334
  else:
335
  print("ℹ️ MODAL_ENDPOINT_URL not configured, using local processing")
336
 
337
+ # For hackathon deploy: avoid heavy local MoviePy on Spaces to prevent hangs
338
+ print("πŸ”§ Local processing disabled on this deployment.")
 
 
 
 
339
  return {
340
  "success": False,
341
  "output_path": None,
342
  "message": "Local processing disabled - please configure Modal for video generation. Deploy Modal with: modal deploy modal_video_processing.py"
343
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
 
345
+ # -----------------------
346
+ # AGENT INITIALIZATION
347
+ # -----------------------
348
+
349
  def initialize_agent():
350
  """Initialize the CodeAgent with MCP capabilities"""
351
  try:
352
+ # Use Hugging Face API model via HfApiModel
353
+ hf_token = os.getenv("HF_TOKEN")
354
+ model_id = os.getenv("HF_MODEL_ID", "meta-llama/Llama-3.3-70B-Instruct")
355
+ model = HfApiModel(model_id=model_id, token=hf_token)
356
 
 
357
  agent = CodeAgent(
358
  tools=[generate_quote_tool, search_pexels_video_tool, generate_voice_commentary_tool, create_quote_video_tool],
359
  model=model,
360
+ additional_authorized_imports=[
361
+ "requests",
362
+ "openai",
363
+ "random",
364
+ "tempfile",
365
+ "os",
366
+ "google.generativeai",
367
+ "json"
368
+ ],
369
  max_steps=15
370
  )
371
 
 
372
  if mcp_enabled:
373
  agent.mcp_clients = [mcp_client]
374
 
 
376
  except Exception as e:
377
  return None, f"Agent initialization error: {str(e)}"
378
 
 
379
  agent, agent_error = initialize_agent()
380
 
381
+ # -----------------------
382
+ # PIPELINES
383
+ # -----------------------
384
+
385
  def mcp_agent_pipeline(niche, style, num_variations=1):
386
  """
387
+ MAIN PIPELINE: uses smolagents CodeAgent.run to plan & call tools.
388
+ The agent:
389
+ - calls generate_quote_tool
390
+ - calls search_pexels_video_tool multiple times
391
+ - calls create_quote_video_tool for each variation
392
+ - returns JSON with status_log + video_paths
393
  """
394
+ base_log = ["πŸ€– **MCP AGENT RUN**"]
395
 
396
+ if agent_error or agent is None:
397
+ base_log.append(f"❌ Agent initialization failed: {agent_error}")
398
+ base_log.append("πŸ”„ Falling back to direct tool pipeline...")
399
+ status, vids = fallback_pipeline(niche, style, num_variations)
400
+ return "\n".join(base_log + [status]), vids
 
 
401
 
402
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  output_dir = "/tmp/quote_videos"
404
  gallery_dir = "/data/gallery_videos"
405
  os.makedirs(output_dir, exist_ok=True)
406
  os.makedirs(gallery_dir, exist_ok=True)
407
 
 
408
  timestamp = int(time.time())
409
+ base_prefix = f"{output_dir}/agent_{timestamp}_v"
410
 
411
+ user_task = f"""
412
+ You are an autonomous Python agent helping creators generate short vertical quote videos.
413
+
414
+ Niche: {niche}
415
+ Style: {style}
416
+ Number of variations: {num_variations}
417
+
418
+ You have these TOOLS already available in this environment:
419
+
420
+ 1. generate_quote_tool(niche: str, style: str) -> str
421
+ - Returns a unique quote as plain text.
422
+
423
+ 2. search_pexels_video_tool(style: str, niche: str) -> dict
424
+ - Returns a dict with at least:
425
+ - "video_url": str or None
426
+ - "success": bool
427
+
428
+ 3. create_quote_video_tool(video_url: str, quote_text: str, output_path: str, audio_path: str | None = None) -> dict
429
+ - Downloads a video, overlays the quote, and writes a video file to output_path.
430
+ - Returns a dict with at least:
431
+ - "success": bool
432
+ - "output_path": str | None
433
+
434
+ You MAY also have access to external MCP tools through your mcp_clients attribute; you can call them if helpful (e.g. logging, inspiration, etc.), but they are optional.
435
+
436
+ Your job:
437
+
438
+ 1. Call generate_quote_tool once with the given niche and style to obtain quote_text.
439
+ 2. For each variation i from 1 to {num_variations}, call search_pexels_video_tool(style, niche) to get a background video.
440
+ 3. For each successful search result, create an output path EXACTLY as:
441
+ "{base_prefix}{{i}}.mp4" where i is the variation index (1-based).
442
+ 4. Call create_quote_video_tool(video_url, quote_text, output_path) for each variation.
443
+ 5. Only keep variations where create_quote_video_tool returns success == True and a non-empty output_path.
444
+ 6. Build a human-readable status_log string summarizing what you did (which tools were called, success/failures).
445
+ 7. Return ONLY a valid JSON object of the form:
446
+
447
+ {{
448
+ "status_log": "multi-line human readable description of what you did",
449
+ "video_paths": [
450
+ "{base_prefix}1.mp4",
451
+ "... only include paths that actually succeeded ..."
452
+ ]
453
+ }}
454
+
455
+ CRITICAL:
456
+ - Do not wrap the JSON in markdown or backticks.
457
+ - Do not add extra keys.
458
+ - Do not print anything besides the JSON.
459
+ """
460
+ agent_result = agent.run(user_task)
461
 
462
+ try:
463
+ parsed = json.loads(agent_result)
464
+ except Exception as parse_err:
465
+ raise ValueError(f"Agent output was not valid JSON: {parse_err}\nRaw: {agent_result[:500]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
 
467
+ status_log = parsed.get("status_log", "")
468
+ video_paths = parsed.get("video_paths", [])
 
469
 
470
+ # Keep only existing paths
471
+ valid_paths = [
472
+ p for p in video_paths
473
+ if isinstance(p, str) and os.path.exists(p)
474
+ ]
475
 
476
+ if not valid_paths:
477
+ raise ValueError("Agent returned no valid video paths or files do not exist on disk.")
 
 
 
 
 
 
478
 
479
+ # Copy to gallery directory
480
+ for idx, path in enumerate(valid_paths):
481
+ try:
482
+ filename = os.path.basename(path)
483
+ gallery_path = os.path.join(gallery_dir, f"gallery_{timestamp}_v{idx+1}_{filename}")
484
+ shutil.copy2(path, gallery_path)
485
+ except Exception as e:
486
+ print(f"⚠️ Failed to copy to gallery for {path}: {e}")
487
 
488
+ full_status = "\n".join(base_log + [status_log])
489
+ return full_status, valid_paths[:3]
490
 
491
  except Exception as e:
492
+ # Hard fallback if anything goes wrong
493
+ fallback_status, fallback_videos = fallback_pipeline(niche, style, num_variations)
494
+ combined_status = "\n".join(
495
+ base_log
496
+ + [f"��️ Agent pipeline error: {str(e)}", "", "πŸ”„ Switched to fallback pipeline:", fallback_status]
497
+ )
498
+ return combined_status, fallback_videos
499
+
500
 
501
  def fallback_pipeline(niche, style, num_variations=1):
502
+ """Fallback pipeline if MCP agent fails: direct tool calls."""
503
  status_log = []
504
  status_log.append("πŸ”„ **FALLBACK MODE (Direct Tool Execution)**\n")
505
 
 
510
  if "Error" in quote:
511
  return "\n".join(status_log) + f"\n❌ {quote}", []
512
 
513
+ status_log.append(" βœ… Quote generated\n")
514
 
515
  # Search videos
516
  status_log.append(f"πŸ” Searching for {num_variations} videos...")
 
528
  # Create videos
529
  status_log.append("🎬 Creating videos...")
530
  output_dir = "/tmp/quote_videos"
531
+ gallery_dir = "/data/gallery_videos"
532
  os.makedirs(output_dir, exist_ok=True)
533
  os.makedirs(gallery_dir, exist_ok=True)
534
 
 
535
  timestamp = int(time.time())
536
  created_videos = []
537
 
 
543
  video_result["video_url"],
544
  quote,
545
  output_path,
546
+ None
547
  )
548
 
549
  if creation_result["success"]:
550
  created_videos.append(creation_result["output_path"])
551
 
 
 
552
  gallery_filename = f"gallery_{timestamp}_v{i+1}.mp4"
553
  gallery_path = os.path.join(gallery_dir, gallery_filename)
554
  try:
555
  shutil.copy2(creation_result["output_path"], gallery_path)
556
+ except Exception as e:
557
+ print(f"⚠️ Gallery copy failed: {e}")
558
  else:
559
  error_msg = creation_result.get("message", "Unknown error")
560
  status_log.append(f" ❌ Video {i+1} error: {error_msg}")
 
567
 
568
  return "\n".join(status_log), created_videos
569
 
570
+ # -----------------------
571
+ # GRADIO UI
572
+ # -----------------------
573
+
574
  with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.themes.Soft()) as demo:
575
  gr.Markdown("""
576
  # 🎬 AIQuoteClipGenerator
 
578
 
579
  **Key Features:**
580
  - 🌟 **Gemini AI:** No more repetitive quotes! Smart variety tracking
581
+ - πŸ”— **MCP Server Usage:** smolagents CodeAgent + MCP client
582
+ - πŸ› οΈ **4 Custom Tools:** Quote + Video search + Voice (optional) + Video creation
583
+ - πŸ€– **Agent Reasoning:** Autonomous task execution via CodeAgent.run
584
  - ⚑ **Modal Processing:** 4-8x faster video creation
585
  - 🎨 **Multiple Variations:** Get different video styles
 
586
  """)
587
 
588
+ # Example Gallery
589
  with gr.Accordion("πŸ“Έ Example Gallery - Recent Videos", open=True):
590
  gr.Markdown("See what others have created! Updates automatically after generation.")
591
 
 
592
  with gr.Row():
593
  gallery_video1 = gr.Video(label="", height=300, show_label=False, interactive=False)
594
  gallery_video2 = gr.Video(label="", height=300, show_label=False, interactive=False)
595
  gallery_video3 = gr.Video(label="", height=300, show_label=False, interactive=False)
596
 
 
597
  with gr.Row():
598
  gallery_video4 = gr.Video(label="", height=300, show_label=False, interactive=False)
599
  gallery_video5 = gr.Video(label="", height=300, show_label=False, interactive=False)
600
  gallery_video6 = gr.Video(label="", height=300, show_label=False, interactive=False)
601
 
 
602
  def load_gallery_videos():
603
  gallery_output_dir = "/data/gallery_videos"
604
  os.makedirs(gallery_output_dir, exist_ok=True)
605
 
606
  import glob
607
+ existing_videos = sorted(
608
+ glob.glob(f"{gallery_output_dir}/*.mp4"),
609
+ key=os.path.getmtime,
610
+ reverse=True
611
+ )[:6]
612
 
 
613
  videos = [None] * 6
614
  for i, video_path in enumerate(existing_videos):
615
  if i < 6:
 
678
  - 🌟 **Gemini AI** - Eliminates repetitive quotes with smart history tracking
679
  - 🎨 **Multiple Variations** - Get 1-3 different videos to choose from
680
  - ⚑ **Modal Processing** - 4-8x faster with serverless compute
681
+ - πŸ€– **Real Agent** - smolagents CodeAgent orchestrates tool calls
682
+ - πŸ”— **MCP Usage** - Agent wired with MCP client for external tools
683
 
684
  ### πŸ† Hackathon: MCP 1st Birthday
685
  **Track:** Track 2 - MCP in Action
686
  **Category:** Productivity Tools
687
  **Built with:** Gradio + smolagents + Gemini + OpenAI + Pexels + Modal + ElevenLabs + MCP
 
688
  """)
689
 
690
  def process_and_display(niche, style, num_variations):
691
+ status, videos = mcp_agent_pipeline(niche, style, int(num_variations))
692
 
 
693
  v1 = videos[0] if len(videos) > 0 else None
694
  v2 = videos[1] if len(videos) > 1 else None
695
  v3 = videos[2] if len(videos) > 2 else None
696
 
 
697
  gallery_vids = load_gallery_videos()
698
 
699
  return [status, v1, v2, v3] + gallery_vids
 
708
  ]
709
  )
710
 
 
711
  demo.load(
712
  load_gallery_videos,
713
  outputs=[
 
718
 
719
  if __name__ == "__main__":
720
  demo.launch(allowed_paths=["/data/gallery_videos"])