ladybug11 commited on
Commit
6038041
·
1 Parent(s): cdc4967
__pycache__/modal_video_processing.cpython-311.pyc CHANGED
Binary files a/__pycache__/modal_video_processing.cpython-311.pyc and b/__pycache__/modal_video_processing.cpython-311.pyc differ
 
modal_video_processing.py CHANGED
@@ -18,6 +18,7 @@ image = modal.Image.debian_slim(python_version="3.11").pip_install(
18
  "fastapi"
19
  )
20
 
 
21
  @app.function(
22
  image=image,
23
  cpu=2,
@@ -27,9 +28,15 @@ image = modal.Image.debian_slim(python_version="3.11").pip_install(
27
  allow_concurrent_inputs=10, # Process multiple in parallel
28
  container_idle_timeout=120,
29
  )
30
- def process_quote_video(video_url: str, quote_text: str, audio_b64: str = None) -> bytes:
 
 
 
 
 
31
  """
32
- Process quote video on Modal - FAST version (no audio).
 
33
  """
34
  import tempfile
35
  import requests
@@ -37,141 +44,192 @@ def process_quote_video(video_url: str, quote_text: str, audio_b64: str = None)
37
  from PIL import Image, ImageDraw, ImageFont
38
  import numpy as np
39
  import time
40
-
41
  start_time = time.time()
42
-
43
  # Download video
44
  response = requests.get(video_url, stream=True, timeout=30)
45
  response.raise_for_status()
46
-
47
- temp_video = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
48
- with open(temp_video.name, 'wb') as f:
49
- for chunk in response.iter_content(chunk_size=1024*1024):
50
  f.write(chunk)
51
-
52
  # Load video
53
  video = VideoFileClip(temp_video.name)
54
-
 
55
  if video.duration > 10:
56
  video = video.subclip(0, 10)
57
-
58
  w, h = video.size
59
-
60
- # Create text overlay
 
 
 
 
61
  def make_text_frame(t):
62
- img = Image.new('RGBA', (w, h), (0, 0, 0, 0))
63
  draw = ImageDraw.Draw(img)
64
-
65
- font_size = int(h * 0.025)
66
-
67
- try:
68
- font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
69
- except:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  font = ImageFont.load_default()
71
-
72
- max_width = int(w * 0.6)
73
-
74
  # Wrap text
 
75
  words = quote_text.split()
76
  lines = []
77
  current_line = []
78
-
79
  for word in words:
80
- test_line = ' '.join(current_line + [word])
81
  bbox = draw.textbbox((0, 0), test_line, font=font)
82
  text_width = bbox[2] - bbox[0]
83
-
84
  if text_width <= max_width:
85
  current_line.append(word)
86
  else:
87
  if current_line:
88
- lines.append(' '.join(current_line))
89
  current_line = [word]
90
  else:
91
  lines.append(word)
92
-
93
  if current_line:
94
- lines.append(' '.join(current_line))
95
-
 
96
  line_spacing = int(font_size * 0.4)
97
  text_block_height = len(lines) * (font_size + line_spacing)
98
- y = (h - text_block_height) // 2
99
-
 
 
 
 
 
 
 
 
100
  for line in lines:
101
  bbox = draw.textbbox((0, 0), line, font=font)
102
  text_width = bbox[2] - bbox[0]
103
  x = (w - text_width) // 2
104
-
 
105
  outline_width = max(2, int(font_size * 0.08))
106
  for adj_x in range(-outline_width, outline_width + 1):
107
  for adj_y in range(-outline_width, outline_width + 1):
108
- draw.text((x + adj_x, y + adj_y), line, font=font, fill='black')
109
-
110
- draw.text((x, y), line, font=font, fill='white')
 
111
  y += font_size + line_spacing
112
-
113
  return np.array(img)
114
-
115
  text_clip = ImageClip(make_text_frame(0), duration=video.duration)
116
-
117
  # Composite
118
  final_video = CompositeVideoClip([video, text_clip])
119
-
120
  # Export - FAST settings
121
- output_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
122
-
123
  final_video.write_videofile(
124
  output_file.name,
125
- codec='libx264',
126
- audio_codec='aac',
127
  fps=10,
128
- preset='ultrafast',
129
  threads=2,
130
  verbose=False,
131
  logger=None,
132
  bitrate="400k",
133
- ffmpeg_params=['-crf', '30', '-g', '30']
134
  )
135
-
136
  # Read bytes
137
- with open(output_file.name, 'rb') as f:
138
  video_bytes = f.read()
139
-
140
  # Cleanup
141
  video.close()
142
  final_video.close()
143
  os.unlink(temp_video.name)
144
  os.unlink(output_file.name)
145
-
146
  total_time = time.time() - start_time
147
- print(f"🎉 Total: {total_time:.1f}s, Size: {len(video_bytes)/1024/1024:.2f}MB")
148
-
149
  return video_bytes
150
 
151
 
152
  @app.function(image=image)
153
  @modal.web_endpoint(method="POST")
154
  def process_video_endpoint(data: dict):
155
- """Web endpoint"""
156
  video_url = data.get("video_url")
157
  quote_text = data.get("quote_text")
158
- audio_b64 = data.get("audio_b64")
159
-
 
160
  if not video_url or not quote_text:
161
  return {"error": "Missing video_url or quote_text"}, 400
162
-
163
  try:
164
- video_bytes = process_quote_video.remote(video_url, quote_text, audio_b64)
165
-
166
  import base64
 
167
  video_b64 = base64.b64encode(video_bytes).decode()
168
-
169
  return {
170
  "success": True,
171
  "video": video_b64,
172
- "size_mb": len(video_bytes) / 1024 / 1024
173
  }
174
-
175
  except Exception as e:
176
  return {"error": str(e)}, 500
177
 
@@ -181,38 +239,46 @@ def process_video_endpoint(data: dict):
181
  def process_batch_endpoint(data: dict):
182
  """
183
  Batch endpoint - process multiple videos in PARALLEL.
184
- Much faster for generating 2-3 variations!
185
  """
186
  videos_data = data.get("videos", [])
187
-
188
  if not videos_data:
189
  return {"error": "Missing videos array"}, 400
190
-
191
  try:
192
- # Process all videos in parallel using .map()
193
- results = list(process_quote_video.map(
194
- [v["video_url"] for v in videos_data],
195
- [v["quote_text"] for v in videos_data],
196
- [v.get("audio_b64") for v in videos_data]
197
- ))
198
-
199
- # Encode all results
 
 
 
 
 
 
 
200
  import base64
 
201
  encoded_results = []
202
  for video_bytes in results:
203
  video_b64 = base64.b64encode(video_bytes).decode()
204
- encoded_results.append({
205
- "success": True,
206
- "video": video_b64,
207
- "size_mb": len(video_bytes) / 1024 / 1024
208
- })
209
-
 
 
210
  return {
211
  "success": True,
212
  "videos": encoded_results,
213
- "count": len(encoded_results)
214
  }
215
-
216
  except Exception as e:
217
- return {"error": str(e)}, 500
218
-
 
18
  "fastapi"
19
  )
20
 
21
+
22
  @app.function(
23
  image=image,
24
  cpu=2,
 
28
  allow_concurrent_inputs=10, # Process multiple in parallel
29
  container_idle_timeout=120,
30
  )
31
+ def process_quote_video(
32
+ video_url: str,
33
+ quote_text: str,
34
+ audio_b64: str = None,
35
+ text_style: str = "classic_center"
36
+ ) -> bytes:
37
  """
38
+ Process quote video on Modal - FAST version.
39
+ Supports multiple text styles / font layouts.
40
  """
41
  import tempfile
42
  import requests
 
44
  from PIL import Image, ImageDraw, ImageFont
45
  import numpy as np
46
  import time
47
+
48
  start_time = time.time()
49
+
50
  # Download video
51
  response = requests.get(video_url, stream=True, timeout=30)
52
  response.raise_for_status()
53
+
54
+ temp_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
55
+ with open(temp_video.name, "wb") as f:
56
+ for chunk in response.iter_content(chunk_size=1024 * 1024):
57
  f.write(chunk)
58
+
59
  # Load video
60
  video = VideoFileClip(temp_video.name)
61
+
62
+ # Optional: trim to first 10s to keep things snappy
63
  if video.duration > 10:
64
  video = video.subclip(0, 10)
65
+
66
  w, h = video.size
67
+
68
+ # Choose layout + font behavior based on text_style
69
+ # Supported:
70
+ # - "classic_center" → centered, sans serif (default)
71
+ # - "lower_third_serif" → bottom, serif
72
+ # - "typewriter_top" → top, monospace vibe
73
  def make_text_frame(t):
74
+ img = Image.new("RGBA", (w, h), (0, 0, 0, 0))
75
  draw = ImageDraw.Draw(img)
76
+
77
+ # Base font size
78
+ base_font_size = int(h * 0.03)
79
+
80
+ # Defaults
81
+ font_paths = []
82
+ y_mode = "center"
83
+ font_size = base_font_size
84
+
85
+ if text_style == "lower_third_serif":
86
+ font_paths = [
87
+ "/usr/share/fonts/truetype/dejavu/DejaVuSerif-Bold.ttf",
88
+ "/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf",
89
+ ]
90
+ y_mode = "lower_third"
91
+ font_size = int(h * 0.032)
92
+ elif text_style == "typewriter_top":
93
+ font_paths = [
94
+ "/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf",
95
+ "/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf",
96
+ ]
97
+ y_mode = "top"
98
+ font_size = int(h * 0.028)
99
+ else:
100
+ # classic_center
101
+ font_paths = [
102
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
103
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
104
+ ]
105
+ y_mode = "center"
106
+ font_size = base_font_size
107
+
108
+ # Try fonts, fall back to default
109
+ font = None
110
+ for path in font_paths:
111
+ try:
112
+ font = ImageFont.truetype(path, font_size)
113
+ break
114
+ except Exception:
115
+ continue
116
+ if font is None:
117
  font = ImageFont.load_default()
118
+
 
 
119
  # Wrap text
120
+ max_width = int(w * 0.7 if text_style != "classic_center" else w * 0.6)
121
  words = quote_text.split()
122
  lines = []
123
  current_line = []
124
+
125
  for word in words:
126
+ test_line = " ".join(current_line + [word])
127
  bbox = draw.textbbox((0, 0), test_line, font=font)
128
  text_width = bbox[2] - bbox[0]
129
+
130
  if text_width <= max_width:
131
  current_line.append(word)
132
  else:
133
  if current_line:
134
+ lines.append(" ".join(current_line))
135
  current_line = [word]
136
  else:
137
  lines.append(word)
138
+
139
  if current_line:
140
+ lines.append(" ".join(current_line))
141
+
142
+ # Line spacing
143
  line_spacing = int(font_size * 0.4)
144
  text_block_height = len(lines) * (font_size + line_spacing)
145
+
146
+ # Vertical placement
147
+ if y_mode == "top":
148
+ y = int(h * 0.10)
149
+ elif y_mode == "lower_third":
150
+ y = int(h * 0.65)
151
+ else: # center
152
+ y = (h - text_block_height) // 2
153
+
154
+ # Draw lines
155
  for line in lines:
156
  bbox = draw.textbbox((0, 0), line, font=font)
157
  text_width = bbox[2] - bbox[0]
158
  x = (w - text_width) // 2
159
+
160
+ # Outline / stroke
161
  outline_width = max(2, int(font_size * 0.08))
162
  for adj_x in range(-outline_width, outline_width + 1):
163
  for adj_y in range(-outline_width, outline_width + 1):
164
+ draw.text((x + adj_x, y + adj_y), line, font=font, fill="black")
165
+
166
+ # Main text
167
+ draw.text((x, y), line, font=font, fill="white")
168
  y += font_size + line_spacing
169
+
170
  return np.array(img)
171
+
172
  text_clip = ImageClip(make_text_frame(0), duration=video.duration)
173
+
174
  # Composite
175
  final_video = CompositeVideoClip([video, text_clip])
176
+
177
  # Export - FAST settings
178
+ output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
 
179
  final_video.write_videofile(
180
  output_file.name,
181
+ codec="libx264",
182
+ audio_codec="aac",
183
  fps=10,
184
+ preset="ultrafast",
185
  threads=2,
186
  verbose=False,
187
  logger=None,
188
  bitrate="400k",
189
+ ffmpeg_params=["-crf", "30", "-g", "30"],
190
  )
191
+
192
  # Read bytes
193
+ with open(output_file.name, "rb") as f:
194
  video_bytes = f.read()
195
+
196
  # Cleanup
197
  video.close()
198
  final_video.close()
199
  os.unlink(temp_video.name)
200
  os.unlink(output_file.name)
201
+
202
  total_time = time.time() - start_time
203
+ print(f"🎉 Total: {total_time:.1f}s, Size: {len(video_bytes)/1024/1024:.2f}MB, Style: {text_style}")
204
+
205
  return video_bytes
206
 
207
 
208
  @app.function(image=image)
209
  @modal.web_endpoint(method="POST")
210
  def process_video_endpoint(data: dict):
211
+ """Single video web endpoint"""
212
  video_url = data.get("video_url")
213
  quote_text = data.get("quote_text")
214
+ audio_b64 = data.get("audio_b64") # ignored for now
215
+ text_style = data.get("text_style", "classic_center")
216
+
217
  if not video_url or not quote_text:
218
  return {"error": "Missing video_url or quote_text"}, 400
219
+
220
  try:
221
+ video_bytes = process_quote_video.remote(video_url, quote_text, audio_b64, text_style)
222
+
223
  import base64
224
+
225
  video_b64 = base64.b64encode(video_bytes).decode()
226
+
227
  return {
228
  "success": True,
229
  "video": video_b64,
230
+ "size_mb": len(video_bytes) / 1024 / 1024,
231
  }
232
+
233
  except Exception as e:
234
  return {"error": str(e)}, 500
235
 
 
239
  def process_batch_endpoint(data: dict):
240
  """
241
  Batch endpoint - process multiple videos in PARALLEL.
 
242
  """
243
  videos_data = data.get("videos", [])
244
+
245
  if not videos_data:
246
  return {"error": "Missing videos array"}, 400
247
+
248
  try:
249
+ # Extract per-video parameters
250
+ video_urls = [v["video_url"] for v in videos_data]
251
+ quote_texts = [v["quote_text"] for v in videos_data]
252
+ audio_b64s = [v.get("audio_b64") for v in videos_data]
253
+ text_styles = [v.get("text_style", "classic_center") for v in videos_data]
254
+
255
+ results = list(
256
+ process_quote_video.map(
257
+ video_urls,
258
+ quote_texts,
259
+ audio_b64s,
260
+ text_styles,
261
+ )
262
+ )
263
+
264
  import base64
265
+
266
  encoded_results = []
267
  for video_bytes in results:
268
  video_b64 = base64.b64encode(video_bytes).decode()
269
+ encoded_results.append(
270
+ {
271
+ "success": True,
272
+ "video": video_b64,
273
+ "size_mb": len(video_bytes) / 1024 / 1024,
274
+ }
275
+ )
276
+
277
  return {
278
  "success": True,
279
  "videos": encoded_results,
280
+ "count": len(encoded_results),
281
  }
282
+
283
  except Exception as e:
284
+ return {"error": str(e)}, 500