ladybug11 commited on
Commit
2f051ee
Β·
1 Parent(s): 6038041
Files changed (3) hide show
  1. app.py +80 -30
  2. modal_video_processing.py +16 -15
  3. quote_generator_gemini.py +1 -1
app.py CHANGED
@@ -3,7 +3,6 @@ import time
3
  import json
4
  import shutil
5
  import random
6
- import tempfile
7
  import requests
8
 
9
  import gradio as gr
@@ -147,7 +146,16 @@ def search_pexels_video_tool(style: str, niche: str) -> dict:
147
  queries = search_strategies.get(niche, {}).get(style, ["aesthetic nature"])
148
 
149
  try:
150
- headers = {"Authorization": PEXELS_API_KEY} if PEXELS_API_KEY else {}
 
 
 
 
 
 
 
 
 
151
  query = random.choice(queries)
152
 
153
  url = (
@@ -162,9 +170,7 @@ def search_pexels_video_tool(style: str, niche: str) -> dict:
162
  video_files = video.get("video_files", [])
163
 
164
  portrait_videos = [
165
- vf
166
- for vf in video_files
167
- if vf.get("width", 0) < vf.get("height", 0)
168
  ]
169
 
170
  if portrait_videos:
@@ -203,7 +209,12 @@ def search_pexels_video_tool(style: str, niche: str) -> dict:
203
 
204
 
205
  @tool
206
- def create_quote_video_tool(video_url: str, quote_text: str, output_path: str) -> dict:
 
 
 
 
 
207
  """
208
  Create a quote video by calling a Modal endpoint that overlays text on a background video.
209
 
@@ -211,6 +222,7 @@ def create_quote_video_tool(video_url: str, quote_text: str, output_path: str) -
211
  video_url: Direct URL of the background video (e.g. from Pexels).
212
  quote_text: The quote text to be overlaid on the video.
213
  output_path: Local file path where the resulting video should be saved.
 
214
 
215
  Returns:
216
  A dictionary with:
@@ -232,12 +244,13 @@ def create_quote_video_tool(video_url: str, quote_text: str, output_path: str) -
232
  }
233
 
234
  try:
235
- print("πŸš€ Processing on Modal (fast!)...")
236
  response = requests.post(
237
  modal_endpoint,
238
  json={
239
  "video_url": video_url,
240
  "quote_text": quote_text,
 
241
  },
242
  timeout=120,
243
  )
@@ -271,7 +284,7 @@ def create_quote_video_tool(video_url: str, quote_text: str, output_path: str) -
271
  return {
272
  "success": True,
273
  "output_path": output_path,
274
- "message": f"Video created via Modal (~{size_mb:.2f}MB).",
275
  }
276
 
277
  except Exception as e:
@@ -300,8 +313,8 @@ def initialize_agent():
300
  "time",
301
  "json",
302
  "random",
303
- "tempfile",
304
  "requests",
 
305
  ],
306
  max_steps=15,
307
  )
@@ -321,7 +334,12 @@ agent, agent_error = initialize_agent()
321
  # PIPELINES
322
  # -------------------------------------------------
323
 
324
- def mcp_agent_pipeline(niche: str, style: str, num_variations: int = 1):
 
 
 
 
 
325
  """
326
  MAIN PIPELINE: uses smolagents CodeAgent.run to plan & call tools.
327
 
@@ -336,7 +354,7 @@ def mcp_agent_pipeline(niche: str, style: str, num_variations: int = 1):
336
  if agent_error or agent is None:
337
  base_log.append(f"❌ Agent initialization failed: {agent_error}")
338
  base_log.append("πŸ”„ Falling back to direct tool pipeline...")
339
- status, vids = fallback_pipeline(niche, style, num_variations)
340
  return "\n".join(base_log + [status]), vids
341
 
342
  try:
@@ -352,20 +370,26 @@ def mcp_agent_pipeline(niche: str, style: str, num_variations: int = 1):
352
  You are an autonomous Python agent helping creators generate short vertical quote videos.
353
 
354
  Niche: {niche}
355
- Style: {style}
 
356
  Number of variations: {num_variations}
357
 
358
  You have these TOOLS available:
359
 
360
  1. generate_quote_tool(niche: str, style: str) -> str
361
- - Returns a single quote as plain text.
362
 
363
  2. search_pexels_video_tool(style: str, niche: str) -> dict
364
  - Returns a dict with:
365
  - "success": bool
366
  - "video_url": str or None
367
 
368
- 3. create_quote_video_tool(video_url: str, quote_text: str, output_path: str) -> dict
 
 
 
 
 
369
  - Writes a video file to output_path and returns a dict with:
370
  - "success": bool
371
  - "output_path": str or None
@@ -376,8 +400,8 @@ Your job:
376
  2. For each variation i from 1 to {num_variations}:
377
  - Call search_pexels_video_tool(style, niche).
378
  - If it succeeds, compute output_path exactly as:
379
- "{base_prefix}{{i}}.mp4"
380
- - Call create_quote_video_tool(video_url, quote_text, output_path).
381
  3. Collect only variations where create_quote_video_tool returns success == True and a non-empty output_path.
382
  4. Build a human-readable status_log string summarizing:
383
  - Which tools you called
@@ -433,15 +457,27 @@ CRITICAL:
433
  return full_status, valid_paths[:3]
434
 
435
  except Exception as e:
436
- fallback_status, fallback_videos = fallback_pipeline(niche, style, num_variations)
 
 
437
  combined_status = "\n".join(
438
  base_log
439
- + [f"⚠️ Agent pipeline error: {str(e)}", "", "πŸ”„ Switched to fallback pipeline:", fallback_status]
 
 
 
 
 
440
  )
441
  return combined_status, fallback_videos
442
 
443
 
444
- def fallback_pipeline(niche: str, style: str, num_variations: int = 1):
 
 
 
 
 
445
  """Fallback pipeline: direct tool calls without agent planning."""
446
  status_log = []
447
  status_log.append("πŸ”„ **FALLBACK MODE (Direct Tool Execution)**\n")
@@ -484,6 +520,7 @@ def fallback_pipeline(niche: str, style: str, num_variations: int = 1):
484
  video_url=vr["video_url"],
485
  quote_text=quote,
486
  output_path=output_path,
 
487
  )
488
 
489
  if creation_result.get("success"):
@@ -523,11 +560,11 @@ with gr.Blocks(
523
  ### MCP-Powered with Gemini AI
524
 
525
  **Key Features:**
526
- - 🌟 **Gemini AI** with quote-history to avoid repetition
527
- - πŸ€– **smolagents CodeAgent** for planning & tool-use
528
- - πŸ”— **MCP Client Ready** (uses external MCP tools if available)
529
- - πŸŽ₯ **Modal** for fast video rendering
530
- - 🎨 Generate multiple vertical quote video variations
531
  """
532
  )
533
 
@@ -590,6 +627,17 @@ with gr.Blocks(
590
  value="Cinematic",
591
  )
592
 
 
 
 
 
 
 
 
 
 
 
 
593
  num_variations = gr.Slider(
594
  minimum=1,
595
  maximum=3,
@@ -619,10 +667,10 @@ with gr.Blocks(
619
  """
620
  ---
621
  ### ✨ Features
622
- - 🌟 **Gemini-powered** quote variety (history-aware)
623
- - 🎨 Multiple aesthetic video variations
624
- - ⚑ **Modal**-accelerated rendering
625
- - πŸ€– **smolagents** CodeAgent for autonomous tool-calling
626
  - πŸ”— Optional MCP integration via MCPClient
627
 
628
  ### πŸ† Hackathon: MCP 1st Birthday
@@ -632,10 +680,11 @@ with gr.Blocks(
632
  """
633
  )
634
 
635
- def process_and_display(niche, style, num_variations):
636
  status, videos = mcp_agent_pipeline(
637
  niche=str(niche),
638
  style=str(style),
 
639
  num_variations=int(num_variations),
640
  )
641
 
@@ -649,7 +698,7 @@ with gr.Blocks(
649
 
650
  generate_btn.click(
651
  process_and_display,
652
- inputs=[niche, style, num_variations],
653
  outputs=[
654
  output,
655
  video1,
@@ -678,3 +727,4 @@ with gr.Blocks(
678
 
679
  if __name__ == "__main__":
680
  demo.launch(allowed_paths=["/data/gallery_videos"])
 
 
3
  import json
4
  import shutil
5
  import random
 
6
  import requests
7
 
8
  import gradio as gr
 
146
  queries = search_strategies.get(niche, {}).get(style, ["aesthetic nature"])
147
 
148
  try:
149
+ if not PEXELS_API_KEY:
150
+ return {
151
+ "success": False,
152
+ "video_url": None,
153
+ "search_query": "",
154
+ "pexels_url": None,
155
+ "error": "PEXELS_API_KEY not configured",
156
+ }
157
+
158
+ headers = {"Authorization": PEXELS_API_KEY}
159
  query = random.choice(queries)
160
 
161
  url = (
 
170
  video_files = video.get("video_files", [])
171
 
172
  portrait_videos = [
173
+ vf for vf in video_files if vf.get("width", 0) < vf.get("height", 0)
 
 
174
  ]
175
 
176
  if portrait_videos:
 
209
 
210
 
211
  @tool
212
+ def create_quote_video_tool(
213
+ video_url: str,
214
+ quote_text: str,
215
+ output_path: str,
216
+ text_style: str = "classic_center",
217
+ ) -> dict:
218
  """
219
  Create a quote video by calling a Modal endpoint that overlays text on a background video.
220
 
 
222
  video_url: Direct URL of the background video (e.g. from Pexels).
223
  quote_text: The quote text to be overlaid on the video.
224
  output_path: Local file path where the resulting video should be saved.
225
+ text_style: Visual text style/layout (e.g. 'classic_center', 'lower_third_serif', 'typewriter_top').
226
 
227
  Returns:
228
  A dictionary with:
 
244
  }
245
 
246
  try:
247
+ print(f"πŸš€ Processing on Modal (fast!) with text_style={text_style}...")
248
  response = requests.post(
249
  modal_endpoint,
250
  json={
251
  "video_url": video_url,
252
  "quote_text": quote_text,
253
+ "text_style": text_style,
254
  },
255
  timeout=120,
256
  )
 
284
  return {
285
  "success": True,
286
  "output_path": output_path,
287
+ "message": f"Video created via Modal (~{size_mb:.2f}MB, style={text_style}).",
288
  }
289
 
290
  except Exception as e:
 
313
  "time",
314
  "json",
315
  "random",
 
316
  "requests",
317
+ "shutil",
318
  ],
319
  max_steps=15,
320
  )
 
334
  # PIPELINES
335
  # -------------------------------------------------
336
 
337
+ def mcp_agent_pipeline(
338
+ niche: str,
339
+ style: str,
340
+ text_style: str = "classic_center",
341
+ num_variations: int = 1,
342
+ ):
343
  """
344
  MAIN PIPELINE: uses smolagents CodeAgent.run to plan & call tools.
345
 
 
354
  if agent_error or agent is None:
355
  base_log.append(f"❌ Agent initialization failed: {agent_error}")
356
  base_log.append("πŸ”„ Falling back to direct tool pipeline...")
357
+ status, vids = fallback_pipeline(niche, style, text_style, num_variations)
358
  return "\n".join(base_log + [status]), vids
359
 
360
  try:
 
370
  You are an autonomous Python agent helping creators generate short vertical quote videos.
371
 
372
  Niche: {niche}
373
+ Visual Style: {style}
374
+ Text style for quotes: {text_style}
375
  Number of variations: {num_variations}
376
 
377
  You have these TOOLS available:
378
 
379
  1. generate_quote_tool(niche: str, style: str) -> str
380
+ - Returns a single SHORT quote as plain text.
381
 
382
  2. search_pexels_video_tool(style: str, niche: str) -> dict
383
  - Returns a dict with:
384
  - "success": bool
385
  - "video_url": str or None
386
 
387
+ 3. create_quote_video_tool(
388
+ video_url: str,
389
+ quote_text: str,
390
+ output_path: str,
391
+ text_style: str = "classic_center"
392
+ ) -> dict
393
  - Writes a video file to output_path and returns a dict with:
394
  - "success": bool
395
  - "output_path": str or None
 
400
  2. For each variation i from 1 to {num_variations}:
401
  - Call search_pexels_video_tool(style, niche).
402
  - If it succeeds, compute output_path exactly as:
403
+ "{base_prefix}" + str(i) + ".mp4"
404
+ - Call create_quote_video_tool(video_url, quote_text, output_path, text_style="{text_style}").
405
  3. Collect only variations where create_quote_video_tool returns success == True and a non-empty output_path.
406
  4. Build a human-readable status_log string summarizing:
407
  - Which tools you called
 
457
  return full_status, valid_paths[:3]
458
 
459
  except Exception as e:
460
+ fallback_status, fallback_videos = fallback_pipeline(
461
+ niche, style, text_style, num_variations
462
+ )
463
  combined_status = "\n".join(
464
  base_log
465
+ + [
466
+ f"⚠️ Agent pipeline error: {str(e)}",
467
+ "",
468
+ "πŸ”„ Switched to fallback pipeline:",
469
+ fallback_status,
470
+ ]
471
  )
472
  return combined_status, fallback_videos
473
 
474
 
475
+ def fallback_pipeline(
476
+ niche: str,
477
+ style: str,
478
+ text_style: str = "classic_center",
479
+ num_variations: int = 1,
480
+ ):
481
  """Fallback pipeline: direct tool calls without agent planning."""
482
  status_log = []
483
  status_log.append("πŸ”„ **FALLBACK MODE (Direct Tool Execution)**\n")
 
520
  video_url=vr["video_url"],
521
  quote_text=quote,
522
  output_path=output_path,
523
+ text_style=text_style,
524
  )
525
 
526
  if creation_result.get("success"):
 
560
  ### MCP-Powered with Gemini AI
561
 
562
  **Key Features:**
563
+ - 🌟 Short, non-repeating Gemini quotes (per niche history)
564
+ - πŸ€– smolagents CodeAgent for tool planning
565
+ - πŸ”— Optional MCP client integration
566
+ - πŸŽ₯ Modal for fast video rendering
567
+ - πŸ…°οΈ Text style controls (font & placement)
568
  """
569
  )
570
 
 
627
  value="Cinematic",
628
  )
629
 
630
+ text_style = gr.Dropdown(
631
+ choices=[
632
+ "classic_center",
633
+ "lower_third_serif",
634
+ "typewriter_top",
635
+ ],
636
+ label="πŸ…°οΈ Text Style",
637
+ value="classic_center",
638
+ info="Change font & quote placement on the video",
639
+ )
640
+
641
  num_variations = gr.Slider(
642
  minimum=1,
643
  maximum=3,
 
667
  """
668
  ---
669
  ### ✨ Features
670
+ - 🌟 Gemini-powered, short non-repeating quotes (per niche)
671
+ - 🎨 Multiple aesthetic video & text layouts
672
+ - ⚑ Modal-accelerated rendering
673
+ - πŸ€– smolagents CodeAgent for autonomous tool-calling
674
  - πŸ”— Optional MCP integration via MCPClient
675
 
676
  ### πŸ† Hackathon: MCP 1st Birthday
 
680
  """
681
  )
682
 
683
+ def process_and_display(niche, style, text_style, num_variations):
684
  status, videos = mcp_agent_pipeline(
685
  niche=str(niche),
686
  style=str(style),
687
+ text_style=str(text_style),
688
  num_variations=int(num_variations),
689
  )
690
 
 
698
 
699
  generate_btn.click(
700
  process_and_display,
701
+ inputs=[niche, style, text_style, num_variations],
702
  outputs=[
703
  output,
704
  video1,
 
727
 
728
  if __name__ == "__main__":
729
  demo.launch(allowed_paths=["/data/gallery_videos"])
730
+
modal_video_processing.py CHANGED
@@ -15,7 +15,7 @@ image = modal.Image.debian_slim(python_version="3.11").pip_install(
15
  "imageio==2.31.1",
16
  "imageio-ffmpeg",
17
  "requests",
18
- "fastapi"
19
  )
20
 
21
 
@@ -24,15 +24,15 @@ image = modal.Image.debian_slim(python_version="3.11").pip_install(
24
  cpu=2,
25
  memory=2048,
26
  timeout=180,
27
- concurrency_limit=10, # Allow 10 videos at once
28
- allow_concurrent_inputs=10, # Process multiple in parallel
29
  container_idle_timeout=120,
30
  )
31
  def process_quote_video(
32
  video_url: str,
33
  quote_text: str,
34
  audio_b64: str = None,
35
- text_style: str = "classic_center"
36
  ) -> bytes:
37
  """
38
  Process quote video on Modal - FAST version.
@@ -59,7 +59,7 @@ def process_quote_video(
59
  # Load video
60
  video = VideoFileClip(temp_video.name)
61
 
62
- # Optional: trim to first 10s to keep things snappy
63
  if video.duration > 10:
64
  video = video.subclip(0, 10)
65
 
@@ -67,17 +67,15 @@ def process_quote_video(
67
 
68
  # Choose layout + font behavior based on text_style
69
  # Supported:
70
- # - "classic_center" β†’ centered, sans serif (default)
71
- # - "lower_third_serif" β†’ bottom, serif
72
- # - "typewriter_top" β†’ top, monospace vibe
73
  def make_text_frame(t):
74
  img = Image.new("RGBA", (w, h), (0, 0, 0, 0))
75
  draw = ImageDraw.Draw(img)
76
 
77
- # Base font size
78
  base_font_size = int(h * 0.03)
79
 
80
- # Defaults
81
  font_paths = []
82
  y_mode = "center"
83
  font_size = base_font_size
@@ -96,8 +94,7 @@ def process_quote_video(
96
  ]
97
  y_mode = "top"
98
  font_size = int(h * 0.028)
99
- else:
100
- # classic_center
101
  font_paths = [
102
  "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
103
  "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
@@ -200,7 +197,9 @@ def process_quote_video(
200
  os.unlink(output_file.name)
201
 
202
  total_time = time.time() - start_time
203
- print(f"πŸŽ‰ Total: {total_time:.1f}s, Size: {len(video_bytes)/1024/1024:.2f}MB, Style: {text_style}")
 
 
204
 
205
  return video_bytes
206
 
@@ -218,7 +217,9 @@ def process_video_endpoint(data: dict):
218
  return {"error": "Missing video_url or quote_text"}, 400
219
 
220
  try:
221
- video_bytes = process_quote_video.remote(video_url, quote_text, audio_b64, text_style)
 
 
222
 
223
  import base64
224
 
@@ -281,4 +282,4 @@ def process_batch_endpoint(data: dict):
281
  }
282
 
283
  except Exception as e:
284
- return {"error": str(e)}, 500
 
15
  "imageio==2.31.1",
16
  "imageio-ffmpeg",
17
  "requests",
18
+ "fastapi",
19
  )
20
 
21
 
 
24
  cpu=2,
25
  memory=2048,
26
  timeout=180,
27
+ concurrency_limit=10,
28
+ allow_concurrent_inputs=10,
29
  container_idle_timeout=120,
30
  )
31
  def process_quote_video(
32
  video_url: str,
33
  quote_text: str,
34
  audio_b64: str = None,
35
+ text_style: str = "classic_center",
36
  ) -> bytes:
37
  """
38
  Process quote video on Modal - FAST version.
 
59
  # Load video
60
  video = VideoFileClip(temp_video.name)
61
 
62
+ # Trim to first 10 seconds
63
  if video.duration > 10:
64
  video = video.subclip(0, 10)
65
 
 
67
 
68
  # Choose layout + font behavior based on text_style
69
  # Supported:
70
+ # - "classic_center" β†’ centered, sans serif (default)
71
+ # - "lower_third_serif" β†’ bottom, serif
72
+ # - "typewriter_top" β†’ top, monospace-ish
73
  def make_text_frame(t):
74
  img = Image.new("RGBA", (w, h), (0, 0, 0, 0))
75
  draw = ImageDraw.Draw(img)
76
 
 
77
  base_font_size = int(h * 0.03)
78
 
 
79
  font_paths = []
80
  y_mode = "center"
81
  font_size = base_font_size
 
94
  ]
95
  y_mode = "top"
96
  font_size = int(h * 0.028)
97
+ else: # classic_center
 
98
  font_paths = [
99
  "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
100
  "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
 
197
  os.unlink(output_file.name)
198
 
199
  total_time = time.time() - start_time
200
+ print(
201
+ f"πŸŽ‰ Total: {total_time:.1f}s, Size: {len(video_bytes)/1024/1024:.2f}MB, Style: {text_style}"
202
+ )
203
 
204
  return video_bytes
205
 
 
217
  return {"error": "Missing video_url or quote_text"}, 400
218
 
219
  try:
220
+ video_bytes = process_quote_video.remote(
221
+ video_url, quote_text, audio_b64, text_style
222
+ )
223
 
224
  import base64
225
 
 
282
  }
283
 
284
  except Exception as e:
285
+ return {"error": str(e)}, 500
quote_generator_gemini.py CHANGED
@@ -6,7 +6,7 @@ import google.generativeai as genai
6
  import os
7
  import json
8
  import time
9
- from typing import List, Optional
10
 
11
 
12
  class QuoteGenerator:
 
6
  import os
7
  import json
8
  import time
9
+ from typing import Optional
10
 
11
 
12
  class QuoteGenerator: