update
Browse files- app.py +144 -40
- modal_video_processing.py +41 -14
- quote_generator_gemini.py +2 -1
app.py
CHANGED
|
@@ -194,33 +194,73 @@ def search_pexels_video_tool(style: str, niche: str) -> dict:
|
|
| 194 |
}
|
| 195 |
|
| 196 |
@tool
|
| 197 |
-
def
|
| 198 |
"""
|
| 199 |
-
Generate voice
|
|
|
|
|
|
|
| 200 |
|
| 201 |
Args:
|
| 202 |
-
quote_text: The quote
|
|
|
|
| 203 |
output_path: Path where to save the audio file
|
| 204 |
|
| 205 |
Returns:
|
| 206 |
-
Dictionary with success status and
|
| 207 |
"""
|
| 208 |
|
| 209 |
try:
|
| 210 |
-
# Generate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
audio = elevenlabs_client.text_to_speech.convert(
|
| 212 |
-
text=
|
| 213 |
-
voice_id="pNInz6obpgDQGcFmaJgB", # Adam
|
| 214 |
model_id="eleven_multilingual_v2",
|
| 215 |
voice_settings=VoiceSettings(
|
| 216 |
-
stability=0.
|
| 217 |
-
similarity_boost=0.
|
| 218 |
-
style=0.
|
| 219 |
use_speaker_boost=True
|
| 220 |
)
|
| 221 |
)
|
| 222 |
|
| 223 |
-
# Save audio
|
| 224 |
with open(output_path, 'wb') as f:
|
| 225 |
for chunk in audio:
|
| 226 |
f.write(chunk)
|
|
@@ -228,14 +268,16 @@ def generate_voice_narration_tool(quote_text: str, output_path: str) -> dict:
|
|
| 228 |
return {
|
| 229 |
"success": True,
|
| 230 |
"output_path": output_path,
|
| 231 |
-
"
|
|
|
|
| 232 |
}
|
| 233 |
|
| 234 |
except Exception as e:
|
| 235 |
return {
|
| 236 |
"success": False,
|
| 237 |
"output_path": None,
|
| 238 |
-
"
|
|
|
|
| 239 |
}
|
| 240 |
|
| 241 |
@tool
|
|
@@ -265,9 +307,13 @@ def create_quote_video_tool(video_url: str, quote_text: str, output_path: str, a
|
|
| 265 |
|
| 266 |
print("π Processing on Modal (fast!)...")
|
| 267 |
|
| 268 |
-
#
|
| 269 |
-
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
# Call Modal endpoint
|
| 273 |
response = requests.post(
|
|
@@ -275,9 +321,9 @@ def create_quote_video_tool(video_url: str, quote_text: str, output_path: str, a
|
|
| 275 |
json={
|
| 276 |
"video_url": video_url,
|
| 277 |
"quote_text": quote_text,
|
| 278 |
-
"
|
| 279 |
},
|
| 280 |
-
timeout=300
|
| 281 |
)
|
| 282 |
|
| 283 |
if response.status_code == 200:
|
|
@@ -459,9 +505,9 @@ def initialize_agent():
|
|
| 459 |
|
| 460 |
# Create agent with custom tools
|
| 461 |
agent = CodeAgent(
|
| 462 |
-
tools=[generate_quote_tool, search_pexels_video_tool,
|
| 463 |
model=model,
|
| 464 |
-
additional_authorized_imports=["requests", "openai", "random", "tempfile", "os"],
|
| 465 |
max_steps=15
|
| 466 |
)
|
| 467 |
|
|
@@ -476,16 +522,29 @@ def initialize_agent():
|
|
| 476 |
# Initialize agent
|
| 477 |
agent, agent_error = initialize_agent()
|
| 478 |
|
| 479 |
-
def mcp_agent_pipeline(niche, style, num_variations=1):
|
| 480 |
"""
|
| 481 |
MCP-POWERED AUTONOMOUS AGENT PIPELINE
|
| 482 |
Uses smolagents with proper MCP server integration
|
| 483 |
Generates multiple video variations with Gemini-powered quotes
|
|
|
|
| 484 |
"""
|
| 485 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 486 |
status_log = []
|
| 487 |
status_log.append("π€ **MCP AGENT STARTING**\n")
|
| 488 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
if agent_error:
|
| 490 |
status_log.append(f"β Agent initialization failed: {agent_error}")
|
| 491 |
status_log.append("\nπ Falling back to direct tool execution...\n")
|
|
@@ -523,9 +582,35 @@ def mcp_agent_pipeline(niche, style, num_variations=1):
|
|
| 523 |
|
| 524 |
status_log.append("")
|
| 525 |
|
| 526 |
-
# STEP 4:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 527 |
status_log.append(f"π¬ **MCP TOOL: create_quote_video_tool (x{len(video_results)})**")
|
| 528 |
status_log.append(f" β³ Creating {len(video_results)} video variations...")
|
|
|
|
|
|
|
| 529 |
|
| 530 |
output_dir = "/tmp/quote_videos"
|
| 531 |
gallery_dir = "/data/gallery_videos" # HF persistent storage
|
|
@@ -544,7 +629,7 @@ def mcp_agent_pipeline(niche, style, num_variations=1):
|
|
| 544 |
video_result["video_url"],
|
| 545 |
quote,
|
| 546 |
output_path,
|
| 547 |
-
|
| 548 |
)
|
| 549 |
|
| 550 |
if creation_result["success"]:
|
|
@@ -569,10 +654,12 @@ def mcp_agent_pipeline(niche, style, num_variations=1):
|
|
| 569 |
|
| 570 |
status_log.append("")
|
| 571 |
|
| 572 |
-
# STEP
|
| 573 |
status_log.append("π **AI INTEGRATIONS:**")
|
| 574 |
status_log.append(" β
Gemini API - Quote generation with variety tracking")
|
| 575 |
status_log.append(" β
Pexels API - Video search")
|
|
|
|
|
|
|
| 576 |
status_log.append(" β
Modal Compute - Fast video processing")
|
| 577 |
if mcp_enabled:
|
| 578 |
status_log.append(" β
MCP Server - abidlabs-mcp-tools.hf.space")
|
|
@@ -590,7 +677,7 @@ def mcp_agent_pipeline(niche, style, num_variations=1):
|
|
| 590 |
status_log.append(f"\nβ Pipeline error: {str(e)}")
|
| 591 |
return "\n".join(status_log), []
|
| 592 |
|
| 593 |
-
def fallback_pipeline(niche, style, num_variations=1):
|
| 594 |
"""Fallback pipeline if MCP agent fails"""
|
| 595 |
status_log = []
|
| 596 |
status_log.append("π **FALLBACK MODE (Direct Tool Execution)**\n")
|
|
@@ -604,6 +691,20 @@ def fallback_pipeline(niche, style, num_variations=1):
|
|
| 604 |
|
| 605 |
status_log.append(f" β
Quote generated\n")
|
| 606 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 607 |
# Search videos
|
| 608 |
status_log.append(f"π Searching for {num_variations} videos...")
|
| 609 |
video_results = []
|
|
@@ -636,7 +737,7 @@ def fallback_pipeline(niche, style, num_variations=1):
|
|
| 636 |
video_result["video_url"],
|
| 637 |
quote,
|
| 638 |
output_path,
|
| 639 |
-
|
| 640 |
)
|
| 641 |
|
| 642 |
if creation_result["success"]:
|
|
@@ -666,21 +767,23 @@ def fallback_pipeline(niche, style, num_variations=1):
|
|
| 666 |
with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.themes.Soft()) as demo:
|
| 667 |
gr.Markdown("""
|
| 668 |
# π¬ AIQuoteClipGenerator
|
| 669 |
-
### MCP-Powered with Gemini AI
|
| 670 |
|
| 671 |
**Key Features:**
|
| 672 |
- π **Gemini AI:** No more repetitive quotes! Smart variety tracking
|
|
|
|
|
|
|
|
|
|
| 673 |
- π **MCP Server:** smolagents framework integration
|
| 674 |
-
- π οΈ **4 Custom MCP Tools:** Quote
|
| 675 |
-
- π€ **Agent Reasoning:** Autonomous task execution
|
| 676 |
- β‘ **Modal Processing:** 4-8x faster video creation
|
| 677 |
- π¨ **Multiple Variations:** Get different video styles
|
| 678 |
|
| 679 |
**Prize Eligibility:**
|
| 680 |
-
- β
Gemini API Integration ($10K Creative category)
|
| 681 |
-
- β
|
| 682 |
- β
Modal Innovation Award ($2.5K)
|
| 683 |
-
- β
|
| 684 |
""")
|
| 685 |
|
| 686 |
# Example Gallery - Instagram-style grid
|
|
@@ -757,6 +860,12 @@ with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.the
|
|
| 757 |
info="Generate multiple versions to choose from"
|
| 758 |
)
|
| 759 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 760 |
generate_btn = gr.Button("π€ Run MCP Agent with Gemini", variant="primary", size="lg")
|
| 761 |
|
| 762 |
with gr.Column():
|
|
@@ -784,15 +893,10 @@ with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.the
|
|
| 784 |
**Category:** Productivity Tools
|
| 785 |
**Built with:** Gradio + smolagents + Gemini + OpenAI + Pexels + Modal + ElevenLabs + MCP
|
| 786 |
|
| 787 |
-
**Prize Targets:**
|
| 788 |
-
- Google Gemini Creative Award ($10K)
|
| 789 |
-
- Modal Innovation Award ($2.5K)
|
| 790 |
-
- OpenAI API Integration ($1K credits)
|
| 791 |
-
- ElevenLabs Voice Award (~$2K + AirPods)
|
| 792 |
""")
|
| 793 |
|
| 794 |
-
def process_and_display(niche, style, num_variations):
|
| 795 |
-
status, videos = mcp_agent_pipeline(niche, style, num_variations)
|
| 796 |
|
| 797 |
# Return up to 3 videos, None for unused slots
|
| 798 |
v1 = videos[0] if len(videos) > 0 else None
|
|
@@ -806,7 +910,7 @@ with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.the
|
|
| 806 |
|
| 807 |
generate_btn.click(
|
| 808 |
process_and_display,
|
| 809 |
-
inputs=[niche, style, num_variations],
|
| 810 |
outputs=[
|
| 811 |
output, video1, video2, video3,
|
| 812 |
gallery_video1, gallery_video2, gallery_video3,
|
|
|
|
| 194 |
}
|
| 195 |
|
| 196 |
@tool
|
| 197 |
+
def generate_voice_commentary_tool(quote_text: str, niche: str, output_path: str) -> dict:
|
| 198 |
"""
|
| 199 |
+
Generate insightful voice commentary explaining the deeper meaning of the quote.
|
| 200 |
+
Uses Gemini to create thoughtful explanation, then ElevenLabs to voice it.
|
| 201 |
+
This adds VALUE - not just reading what's already on screen.
|
| 202 |
|
| 203 |
Args:
|
| 204 |
+
quote_text: The quote to explain
|
| 205 |
+
niche: The niche/category for context
|
| 206 |
output_path: Path where to save the audio file
|
| 207 |
|
| 208 |
Returns:
|
| 209 |
+
Dictionary with success status, output path, and the explanation text
|
| 210 |
"""
|
| 211 |
|
| 212 |
try:
|
| 213 |
+
# Step 1: Generate explanation using Gemini
|
| 214 |
+
import google.generativeai as genai
|
| 215 |
+
|
| 216 |
+
explanation_prompt = f"""Given this {niche} quote:
|
| 217 |
+
|
| 218 |
+
"{quote_text}"
|
| 219 |
+
|
| 220 |
+
Write a brief, insightful voice-over commentary that explains the deeper meaning or practical wisdom.
|
| 221 |
+
|
| 222 |
+
Requirements:
|
| 223 |
+
- 2-3 sentences maximum
|
| 224 |
+
- Around 25-35 words total
|
| 225 |
+
- Spoken naturally (like a wise mentor)
|
| 226 |
+
- Add insight that isn't obvious from reading
|
| 227 |
+
- Make it thought-provoking
|
| 228 |
+
- Don't start with "This quote..." - dive into the insight
|
| 229 |
+
|
| 230 |
+
Example:
|
| 231 |
+
Quote: "Between stimulus and response there is a space."
|
| 232 |
+
Good: "In that pause lies your freedom. That's where you choose who you become, not who your habits make you."
|
| 233 |
+
|
| 234 |
+
Return ONLY the commentary, nothing else."""
|
| 235 |
+
|
| 236 |
+
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
|
| 237 |
+
model = genai.GenerativeModel('gemini-pro')
|
| 238 |
+
|
| 239 |
+
response = model.generate_content(
|
| 240 |
+
explanation_prompt,
|
| 241 |
+
generation_config={
|
| 242 |
+
"temperature": 0.7,
|
| 243 |
+
"max_output_tokens": 100
|
| 244 |
+
}
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
explanation = response.text.strip().strip('"').strip("'")
|
| 248 |
+
print(f"π Commentary: {explanation}")
|
| 249 |
+
|
| 250 |
+
# Step 2: Generate voice using ElevenLabs
|
| 251 |
audio = elevenlabs_client.text_to_speech.convert(
|
| 252 |
+
text=explanation,
|
| 253 |
+
voice_id="pNInz6obpgDQGcFmaJgB", # Adam - thoughtful and clear
|
| 254 |
model_id="eleven_multilingual_v2",
|
| 255 |
voice_settings=VoiceSettings(
|
| 256 |
+
stability=0.6, # Stable for narration
|
| 257 |
+
similarity_boost=0.8,
|
| 258 |
+
style=0.6, # Expressive for commentary
|
| 259 |
use_speaker_boost=True
|
| 260 |
)
|
| 261 |
)
|
| 262 |
|
| 263 |
+
# Save audio
|
| 264 |
with open(output_path, 'wb') as f:
|
| 265 |
for chunk in audio:
|
| 266 |
f.write(chunk)
|
|
|
|
| 268 |
return {
|
| 269 |
"success": True,
|
| 270 |
"output_path": output_path,
|
| 271 |
+
"explanation": explanation,
|
| 272 |
+
"message": "Voice commentary created!"
|
| 273 |
}
|
| 274 |
|
| 275 |
except Exception as e:
|
| 276 |
return {
|
| 277 |
"success": False,
|
| 278 |
"output_path": None,
|
| 279 |
+
"explanation": None,
|
| 280 |
+
"message": f"Error creating commentary: {str(e)}"
|
| 281 |
}
|
| 282 |
|
| 283 |
@tool
|
|
|
|
| 307 |
|
| 308 |
print("π Processing on Modal (fast!)...")
|
| 309 |
|
| 310 |
+
# Prepare audio data if present
|
| 311 |
+
audio_b64 = None
|
| 312 |
+
if audio_path and os.path.exists(audio_path):
|
| 313 |
+
with open(audio_path, 'rb') as f:
|
| 314 |
+
audio_bytes = f.read()
|
| 315 |
+
audio_b64 = base64.b64encode(audio_bytes).decode()
|
| 316 |
+
print(f" π€ Including voice commentary audio ({len(audio_bytes)} bytes)")
|
| 317 |
|
| 318 |
# Call Modal endpoint
|
| 319 |
response = requests.post(
|
|
|
|
| 321 |
json={
|
| 322 |
"video_url": video_url,
|
| 323 |
"quote_text": quote_text,
|
| 324 |
+
"audio_b64": audio_b64 # Pass audio as base64
|
| 325 |
},
|
| 326 |
+
timeout=300
|
| 327 |
)
|
| 328 |
|
| 329 |
if response.status_code == 200:
|
|
|
|
| 505 |
|
| 506 |
# Create agent with custom tools
|
| 507 |
agent = CodeAgent(
|
| 508 |
+
tools=[generate_quote_tool, search_pexels_video_tool, generate_voice_commentary_tool, create_quote_video_tool],
|
| 509 |
model=model,
|
| 510 |
+
additional_authorized_imports=["requests", "openai", "random", "tempfile", "os", "google.generativeai"],
|
| 511 |
max_steps=15
|
| 512 |
)
|
| 513 |
|
|
|
|
| 522 |
# Initialize agent
|
| 523 |
agent, agent_error = initialize_agent()
|
| 524 |
|
| 525 |
+
def mcp_agent_pipeline(niche, style, num_variations=1, add_voice=False):
|
| 526 |
"""
|
| 527 |
MCP-POWERED AUTONOMOUS AGENT PIPELINE
|
| 528 |
Uses smolagents with proper MCP server integration
|
| 529 |
Generates multiple video variations with Gemini-powered quotes
|
| 530 |
+
Optionally adds ElevenLabs voice narration
|
| 531 |
"""
|
| 532 |
|
| 533 |
+
# Smart auto-enable voice for contemplative content
|
| 534 |
+
auto_voice_niches = ["Stoicism", "Mindfulness", "Leadership"]
|
| 535 |
+
if niche in auto_voice_niches and not add_voice:
|
| 536 |
+
add_voice = True
|
| 537 |
+
voice_reason = "π€ Voice auto-enabled: Perfect for contemplative content!"
|
| 538 |
+
else:
|
| 539 |
+
voice_reason = None
|
| 540 |
+
|
| 541 |
status_log = []
|
| 542 |
status_log.append("π€ **MCP AGENT STARTING**\n")
|
| 543 |
|
| 544 |
+
if voice_reason:
|
| 545 |
+
status_log.append(voice_reason)
|
| 546 |
+
status_log.append("")
|
| 547 |
+
|
| 548 |
if agent_error:
|
| 549 |
status_log.append(f"β Agent initialization failed: {agent_error}")
|
| 550 |
status_log.append("\nπ Falling back to direct tool execution...\n")
|
|
|
|
| 582 |
|
| 583 |
status_log.append("")
|
| 584 |
|
| 585 |
+
# STEP 4: Generate voice commentary if enabled
|
| 586 |
+
audio_path = None
|
| 587 |
+
explanation_text = None
|
| 588 |
+
if add_voice:
|
| 589 |
+
status_log.append("π€ **GEMINI + ELEVENLABS: generate_voice_commentary_tool**")
|
| 590 |
+
status_log.append(" β³ Creating insightful commentary...")
|
| 591 |
+
|
| 592 |
+
import tempfile
|
| 593 |
+
audio_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
|
| 594 |
+
audio_path = audio_file.name
|
| 595 |
+
|
| 596 |
+
voice_result = generate_voice_commentary_tool(quote, niche, audio_path)
|
| 597 |
+
|
| 598 |
+
if voice_result["success"]:
|
| 599 |
+
explanation_text = voice_result.get("explanation")
|
| 600 |
+
status_log.append(" β
Commentary created!")
|
| 601 |
+
status_log.append(f" π Insight: \"{explanation_text}\"")
|
| 602 |
+
status_log.append(f" π Using Gemini (explanation) + ElevenLabs (voice)")
|
| 603 |
+
else:
|
| 604 |
+
status_log.append(f" β οΈ Commentary failed: {voice_result.get('message', 'Unknown error')}")
|
| 605 |
+
audio_path = None
|
| 606 |
+
|
| 607 |
+
status_log.append("")
|
| 608 |
+
|
| 609 |
+
# STEP 5: Create multiple video variations
|
| 610 |
status_log.append(f"π¬ **MCP TOOL: create_quote_video_tool (x{len(video_results)})**")
|
| 611 |
status_log.append(f" β³ Creating {len(video_results)} video variations...")
|
| 612 |
+
if add_voice and audio_path:
|
| 613 |
+
status_log.append(" π€ Including voice narration...")
|
| 614 |
|
| 615 |
output_dir = "/tmp/quote_videos"
|
| 616 |
gallery_dir = "/data/gallery_videos" # HF persistent storage
|
|
|
|
| 629 |
video_result["video_url"],
|
| 630 |
quote,
|
| 631 |
output_path,
|
| 632 |
+
audio_path # Use voice if enabled
|
| 633 |
)
|
| 634 |
|
| 635 |
if creation_result["success"]:
|
|
|
|
| 654 |
|
| 655 |
status_log.append("")
|
| 656 |
|
| 657 |
+
# STEP 6: Integration status
|
| 658 |
status_log.append("π **AI INTEGRATIONS:**")
|
| 659 |
status_log.append(" β
Gemini API - Quote generation with variety tracking")
|
| 660 |
status_log.append(" β
Pexels API - Video search")
|
| 661 |
+
if add_voice:
|
| 662 |
+
status_log.append(" β
ElevenLabs - Premium AI voice synthesis")
|
| 663 |
status_log.append(" β
Modal Compute - Fast video processing")
|
| 664 |
if mcp_enabled:
|
| 665 |
status_log.append(" β
MCP Server - abidlabs-mcp-tools.hf.space")
|
|
|
|
| 677 |
status_log.append(f"\nβ Pipeline error: {str(e)}")
|
| 678 |
return "\n".join(status_log), []
|
| 679 |
|
| 680 |
+
def fallback_pipeline(niche, style, num_variations=1, add_voice=False):
|
| 681 |
"""Fallback pipeline if MCP agent fails"""
|
| 682 |
status_log = []
|
| 683 |
status_log.append("π **FALLBACK MODE (Direct Tool Execution)**\n")
|
|
|
|
| 691 |
|
| 692 |
status_log.append(f" β
Quote generated\n")
|
| 693 |
|
| 694 |
+
# Generate voice commentary if enabled
|
| 695 |
+
audio_path = None
|
| 696 |
+
if add_voice:
|
| 697 |
+
status_log.append("π€ Generating commentary...")
|
| 698 |
+
import tempfile
|
| 699 |
+
audio_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
|
| 700 |
+
audio_path = audio_file.name
|
| 701 |
+
voice_result = generate_voice_commentary_tool(quote, niche, audio_path)
|
| 702 |
+
if voice_result["success"]:
|
| 703 |
+
status_log.append(f" β
Commentary: {voice_result.get('explanation')}\n")
|
| 704 |
+
else:
|
| 705 |
+
audio_path = None
|
| 706 |
+
status_log.append(" β οΈ Commentary failed\n")
|
| 707 |
+
|
| 708 |
# Search videos
|
| 709 |
status_log.append(f"π Searching for {num_variations} videos...")
|
| 710 |
video_results = []
|
|
|
|
| 737 |
video_result["video_url"],
|
| 738 |
quote,
|
| 739 |
output_path,
|
| 740 |
+
audio_path # Use voice if enabled
|
| 741 |
)
|
| 742 |
|
| 743 |
if creation_result["success"]:
|
|
|
|
| 767 |
with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.themes.Soft()) as demo:
|
| 768 |
gr.Markdown("""
|
| 769 |
# π¬ AIQuoteClipGenerator
|
| 770 |
+
### MCP-Powered with Gemini AI + ElevenLabs Voice Commentary
|
| 771 |
|
| 772 |
**Key Features:**
|
| 773 |
- π **Gemini AI:** No more repetitive quotes! Smart variety tracking
|
| 774 |
+
- π€ **Voice Commentary:** AI explains the deeper meaning (not just reading the quote!)
|
| 775 |
+
- π§ **Dual Gemini Use:** Quote generation + Explanation generation
|
| 776 |
+
- π **ElevenLabs Voice:** Premium AI voice synthesis
|
| 777 |
- π **MCP Server:** smolagents framework integration
|
| 778 |
+
- π οΈ **4 Custom MCP Tools:** Quote + Video search + Commentary + Video creation
|
|
|
|
| 779 |
- β‘ **Modal Processing:** 4-8x faster video creation
|
| 780 |
- π¨ **Multiple Variations:** Get different video styles
|
| 781 |
|
| 782 |
**Prize Eligibility:**
|
| 783 |
+
- β
Gemini API Integration ($10K Creative category) - Used TWICE!
|
| 784 |
+
- β
ElevenLabs Voice Award (~$2K + AirPods 4 Pro) - Adds real value!
|
| 785 |
- β
Modal Innovation Award ($2.5K)
|
| 786 |
+
- β
OpenAI Fallback ($1K credits)
|
| 787 |
""")
|
| 788 |
|
| 789 |
# Example Gallery - Instagram-style grid
|
|
|
|
| 860 |
info="Generate multiple versions to choose from"
|
| 861 |
)
|
| 862 |
|
| 863 |
+
add_voice = gr.Checkbox(
|
| 864 |
+
label="π€ Add Voice Commentary (Gemini + ElevenLabs)",
|
| 865 |
+
value=False,
|
| 866 |
+
info="AI explains the quote's deeper meaning with voice - adds real value! Auto-enabled for Stoicism/Mindfulness."
|
| 867 |
+
)
|
| 868 |
+
|
| 869 |
generate_btn = gr.Button("π€ Run MCP Agent with Gemini", variant="primary", size="lg")
|
| 870 |
|
| 871 |
with gr.Column():
|
|
|
|
| 893 |
**Category:** Productivity Tools
|
| 894 |
**Built with:** Gradio + smolagents + Gemini + OpenAI + Pexels + Modal + ElevenLabs + MCP
|
| 895 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 896 |
""")
|
| 897 |
|
| 898 |
+
def process_and_display(niche, style, num_variations, add_voice):
|
| 899 |
+
status, videos = mcp_agent_pipeline(niche, style, num_variations, add_voice)
|
| 900 |
|
| 901 |
# Return up to 3 videos, None for unused slots
|
| 902 |
v1 = videos[0] if len(videos) > 0 else None
|
|
|
|
| 910 |
|
| 911 |
generate_btn.click(
|
| 912 |
process_and_display,
|
| 913 |
+
inputs=[niche, style, num_variations, add_voice],
|
| 914 |
outputs=[
|
| 915 |
output, video1, video2, video3,
|
| 916 |
gallery_video1, gallery_video2, gallery_video3,
|
modal_video_processing.py
CHANGED
|
@@ -26,7 +26,7 @@ image = modal.Image.debian_slim(python_version="3.11").pip_install(
|
|
| 26 |
keep_warm=1, # Keep 1 container warm to eliminate cold starts!
|
| 27 |
container_idle_timeout=300, # Keep alive for 5 minutes
|
| 28 |
)
|
| 29 |
-
def process_quote_video(video_url: str, quote_text: str,
|
| 30 |
"""
|
| 31 |
Process quote video on Modal's fast infrastructure.
|
| 32 |
Downloads video, adds text overlay, optionally adds audio, returns video bytes.
|
|
@@ -34,7 +34,7 @@ def process_quote_video(video_url: str, quote_text: str, audio_url: str = None)
|
|
| 34 |
Args:
|
| 35 |
video_url: URL of background video
|
| 36 |
quote_text: Quote to overlay
|
| 37 |
-
|
| 38 |
|
| 39 |
Returns:
|
| 40 |
bytes: Processed video file as bytes
|
|
@@ -45,6 +45,7 @@ def process_quote_video(video_url: str, quote_text: str, audio_url: str = None)
|
|
| 45 |
from PIL import Image, ImageDraw, ImageFont
|
| 46 |
import numpy as np
|
| 47 |
import time
|
|
|
|
| 48 |
|
| 49 |
start_time = time.time()
|
| 50 |
print(f"π¬ Starting video processing on Modal...")
|
|
@@ -146,11 +147,37 @@ def process_quote_video(video_url: str, quote_text: str, audio_url: str = None)
|
|
| 146 |
final_video = CompositeVideoClip([video, text_clip])
|
| 147 |
print(f"β
Composited in {time.time() - composite_start:.1f}s")
|
| 148 |
|
| 149 |
-
#
|
| 150 |
-
if
|
| 151 |
-
print("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
-
# Export with
|
| 154 |
print("π¦ Exporting video...")
|
| 155 |
export_start = time.time()
|
| 156 |
output_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
|
|
@@ -159,13 +186,13 @@ def process_quote_video(video_url: str, quote_text: str, audio_url: str = None)
|
|
| 159 |
output_file.name,
|
| 160 |
codec='libx264',
|
| 161 |
audio_codec='aac',
|
| 162 |
-
fps=10, #
|
| 163 |
preset='ultrafast',
|
| 164 |
threads=2,
|
| 165 |
verbose=False,
|
| 166 |
logger=None,
|
| 167 |
-
bitrate="400k",
|
| 168 |
-
ffmpeg_params=['-crf', '30', '-g', '30']
|
| 169 |
)
|
| 170 |
|
| 171 |
print(f"β
Video exported in {time.time() - export_start:.1f}s")
|
|
@@ -192,18 +219,18 @@ def process_quote_video(video_url: str, quote_text: str, audio_url: str = None)
|
|
| 192 |
@modal.web_endpoint(method="POST")
|
| 193 |
def process_video_endpoint(data: dict):
|
| 194 |
"""
|
| 195 |
-
Web endpoint to process videos.
|
| 196 |
-
Accepts JSON with video_url, quote_text, and optional
|
| 197 |
"""
|
| 198 |
video_url = data.get("video_url")
|
| 199 |
quote_text = data.get("quote_text")
|
| 200 |
-
|
| 201 |
|
| 202 |
if not video_url or not quote_text:
|
| 203 |
return {"error": "Missing video_url or quote_text"}, 400
|
| 204 |
|
| 205 |
try:
|
| 206 |
-
video_bytes = process_quote_video.remote(video_url, quote_text,
|
| 207 |
|
| 208 |
# Return video bytes as base64
|
| 209 |
import base64
|
|
@@ -225,6 +252,6 @@ if __name__ == "__main__":
|
|
| 225 |
result = process_quote_video.remote(
|
| 226 |
video_url="https://videos.pexels.com/video-files/3843433/3843433-uhd_2732_1440_25fps.mp4",
|
| 227 |
quote_text="Test quote for local testing",
|
| 228 |
-
|
| 229 |
)
|
| 230 |
print(f"Got video: {len(result)} bytes")
|
|
|
|
| 26 |
keep_warm=1, # Keep 1 container warm to eliminate cold starts!
|
| 27 |
container_idle_timeout=300, # Keep alive for 5 minutes
|
| 28 |
)
|
| 29 |
+
def process_quote_video(video_url: str, quote_text: str, audio_b64: str = None) -> bytes:
|
| 30 |
"""
|
| 31 |
Process quote video on Modal's fast infrastructure.
|
| 32 |
Downloads video, adds text overlay, optionally adds audio, returns video bytes.
|
|
|
|
| 34 |
Args:
|
| 35 |
video_url: URL of background video
|
| 36 |
quote_text: Quote to overlay
|
| 37 |
+
audio_b64: Optional base64-encoded audio file
|
| 38 |
|
| 39 |
Returns:
|
| 40 |
bytes: Processed video file as bytes
|
|
|
|
| 45 |
from PIL import Image, ImageDraw, ImageFont
|
| 46 |
import numpy as np
|
| 47 |
import time
|
| 48 |
+
import base64
|
| 49 |
|
| 50 |
start_time = time.time()
|
| 51 |
print(f"π¬ Starting video processing on Modal...")
|
|
|
|
| 147 |
final_video = CompositeVideoClip([video, text_clip])
|
| 148 |
print(f"β
Composited in {time.time() - composite_start:.1f}s")
|
| 149 |
|
| 150 |
+
# Add audio if provided
|
| 151 |
+
if audio_b64:
|
| 152 |
+
print("π€ Adding voice commentary audio...")
|
| 153 |
+
audio_start = time.time()
|
| 154 |
+
try:
|
| 155 |
+
# Decode base64 audio
|
| 156 |
+
audio_bytes = base64.b64decode(audio_b64)
|
| 157 |
+
|
| 158 |
+
# Save to temp file
|
| 159 |
+
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
|
| 160 |
+
with open(temp_audio.name, 'wb') as f:
|
| 161 |
+
f.write(audio_bytes)
|
| 162 |
+
|
| 163 |
+
# Load audio clip
|
| 164 |
+
audio_clip = AudioFileClip(temp_audio.name)
|
| 165 |
+
|
| 166 |
+
# Use the shorter duration between video and audio
|
| 167 |
+
audio_duration = min(audio_clip.duration, final_video.duration)
|
| 168 |
+
audio_clip = audio_clip.subclip(0, audio_duration)
|
| 169 |
+
|
| 170 |
+
# Set audio on video
|
| 171 |
+
final_video = final_video.set_audio(audio_clip)
|
| 172 |
+
|
| 173 |
+
print(f"β
Audio added in {time.time() - audio_start:.1f}s")
|
| 174 |
+
|
| 175 |
+
# Cleanup audio temp file
|
| 176 |
+
os.unlink(temp_audio.name)
|
| 177 |
+
except Exception as e:
|
| 178 |
+
print(f"β οΈ Audio failed: {e}, continuing without audio")
|
| 179 |
|
| 180 |
+
# Export with optimized settings
|
| 181 |
print("π¦ Exporting video...")
|
| 182 |
export_start = time.time()
|
| 183 |
output_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
|
|
|
|
| 186 |
output_file.name,
|
| 187 |
codec='libx264',
|
| 188 |
audio_codec='aac',
|
| 189 |
+
fps=10, # Lower fps for speed
|
| 190 |
preset='ultrafast',
|
| 191 |
threads=2,
|
| 192 |
verbose=False,
|
| 193 |
logger=None,
|
| 194 |
+
bitrate="400k",
|
| 195 |
+
ffmpeg_params=['-crf', '30', '-g', '30']
|
| 196 |
)
|
| 197 |
|
| 198 |
print(f"β
Video exported in {time.time() - export_start:.1f}s")
|
|
|
|
| 219 |
@modal.web_endpoint(method="POST")
|
| 220 |
def process_video_endpoint(data: dict):
|
| 221 |
"""
|
| 222 |
+
Web endpoint to process videos with optional audio.
|
| 223 |
+
Accepts JSON with video_url, quote_text, and optional audio_b64.
|
| 224 |
"""
|
| 225 |
video_url = data.get("video_url")
|
| 226 |
quote_text = data.get("quote_text")
|
| 227 |
+
audio_b64 = data.get("audio_b64") # Changed from audio_url
|
| 228 |
|
| 229 |
if not video_url or not quote_text:
|
| 230 |
return {"error": "Missing video_url or quote_text"}, 400
|
| 231 |
|
| 232 |
try:
|
| 233 |
+
video_bytes = process_quote_video.remote(video_url, quote_text, audio_b64)
|
| 234 |
|
| 235 |
# Return video bytes as base64
|
| 236 |
import base64
|
|
|
|
| 252 |
result = process_quote_video.remote(
|
| 253 |
video_url="https://videos.pexels.com/video-files/3843433/3843433-uhd_2732_1440_25fps.mp4",
|
| 254 |
quote_text="Test quote for local testing",
|
| 255 |
+
audio_b64=None
|
| 256 |
)
|
| 257 |
print(f"Got video: {len(result)} bytes")
|
quote_generator_gemini.py
CHANGED
|
@@ -283,4 +283,5 @@ if __name__ == "__main__":
|
|
| 283 |
|
| 284 |
except Exception as e:
|
| 285 |
print(f"Error: {e}")
|
| 286 |
-
print("\nMake sure GEMINI_API_KEY is set in environment variables")
|
|
|
|
|
|
| 283 |
|
| 284 |
except Exception as e:
|
| 285 |
print(f"Error: {e}")
|
| 286 |
+
print("\nMake sure GEMINI_API_KEY is set in environment variables")
|
| 287 |
+
|