update
Browse files
__pycache__/modal_video_processing.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/modal_video_processing.cpython-311.pyc and b/__pycache__/modal_video_processing.cpython-311.pyc differ
|
|
|
modal_video_processing.py
CHANGED
|
@@ -18,6 +18,7 @@ image = modal.Image.debian_slim(python_version="3.11").pip_install(
|
|
| 18 |
"fastapi"
|
| 19 |
)
|
| 20 |
|
|
|
|
| 21 |
@app.function(
|
| 22 |
image=image,
|
| 23 |
cpu=2,
|
|
@@ -27,9 +28,15 @@ image = modal.Image.debian_slim(python_version="3.11").pip_install(
|
|
| 27 |
allow_concurrent_inputs=10, # Process multiple in parallel
|
| 28 |
container_idle_timeout=120,
|
| 29 |
)
|
| 30 |
-
def process_quote_video(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
"""
|
| 32 |
-
Process quote video on Modal - FAST version
|
|
|
|
| 33 |
"""
|
| 34 |
import tempfile
|
| 35 |
import requests
|
|
@@ -37,141 +44,192 @@ def process_quote_video(video_url: str, quote_text: str, audio_b64: str = None)
|
|
| 37 |
from PIL import Image, ImageDraw, ImageFont
|
| 38 |
import numpy as np
|
| 39 |
import time
|
| 40 |
-
|
| 41 |
start_time = time.time()
|
| 42 |
-
|
| 43 |
# Download video
|
| 44 |
response = requests.get(video_url, stream=True, timeout=30)
|
| 45 |
response.raise_for_status()
|
| 46 |
-
|
| 47 |
-
temp_video = tempfile.NamedTemporaryFile(delete=False, suffix=
|
| 48 |
-
with open(temp_video.name,
|
| 49 |
-
for chunk in response.iter_content(chunk_size=1024*1024):
|
| 50 |
f.write(chunk)
|
| 51 |
-
|
| 52 |
# Load video
|
| 53 |
video = VideoFileClip(temp_video.name)
|
| 54 |
-
|
|
|
|
| 55 |
if video.duration > 10:
|
| 56 |
video = video.subclip(0, 10)
|
| 57 |
-
|
| 58 |
w, h = video.size
|
| 59 |
-
|
| 60 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
def make_text_frame(t):
|
| 62 |
-
img = Image.new(
|
| 63 |
draw = ImageDraw.Draw(img)
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
font = ImageFont.load_default()
|
| 71 |
-
|
| 72 |
-
max_width = int(w * 0.6)
|
| 73 |
-
|
| 74 |
# Wrap text
|
|
|
|
| 75 |
words = quote_text.split()
|
| 76 |
lines = []
|
| 77 |
current_line = []
|
| 78 |
-
|
| 79 |
for word in words:
|
| 80 |
-
test_line =
|
| 81 |
bbox = draw.textbbox((0, 0), test_line, font=font)
|
| 82 |
text_width = bbox[2] - bbox[0]
|
| 83 |
-
|
| 84 |
if text_width <= max_width:
|
| 85 |
current_line.append(word)
|
| 86 |
else:
|
| 87 |
if current_line:
|
| 88 |
-
lines.append(
|
| 89 |
current_line = [word]
|
| 90 |
else:
|
| 91 |
lines.append(word)
|
| 92 |
-
|
| 93 |
if current_line:
|
| 94 |
-
lines.append(
|
| 95 |
-
|
|
|
|
| 96 |
line_spacing = int(font_size * 0.4)
|
| 97 |
text_block_height = len(lines) * (font_size + line_spacing)
|
| 98 |
-
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
for line in lines:
|
| 101 |
bbox = draw.textbbox((0, 0), line, font=font)
|
| 102 |
text_width = bbox[2] - bbox[0]
|
| 103 |
x = (w - text_width) // 2
|
| 104 |
-
|
|
|
|
| 105 |
outline_width = max(2, int(font_size * 0.08))
|
| 106 |
for adj_x in range(-outline_width, outline_width + 1):
|
| 107 |
for adj_y in range(-outline_width, outline_width + 1):
|
| 108 |
-
draw.text((x + adj_x, y + adj_y), line, font=font, fill=
|
| 109 |
-
|
| 110 |
-
|
|
|
|
| 111 |
y += font_size + line_spacing
|
| 112 |
-
|
| 113 |
return np.array(img)
|
| 114 |
-
|
| 115 |
text_clip = ImageClip(make_text_frame(0), duration=video.duration)
|
| 116 |
-
|
| 117 |
# Composite
|
| 118 |
final_video = CompositeVideoClip([video, text_clip])
|
| 119 |
-
|
| 120 |
# Export - FAST settings
|
| 121 |
-
output_file = tempfile.NamedTemporaryFile(delete=False, suffix=
|
| 122 |
-
|
| 123 |
final_video.write_videofile(
|
| 124 |
output_file.name,
|
| 125 |
-
codec=
|
| 126 |
-
audio_codec=
|
| 127 |
fps=10,
|
| 128 |
-
preset=
|
| 129 |
threads=2,
|
| 130 |
verbose=False,
|
| 131 |
logger=None,
|
| 132 |
bitrate="400k",
|
| 133 |
-
ffmpeg_params=[
|
| 134 |
)
|
| 135 |
-
|
| 136 |
# Read bytes
|
| 137 |
-
with open(output_file.name,
|
| 138 |
video_bytes = f.read()
|
| 139 |
-
|
| 140 |
# Cleanup
|
| 141 |
video.close()
|
| 142 |
final_video.close()
|
| 143 |
os.unlink(temp_video.name)
|
| 144 |
os.unlink(output_file.name)
|
| 145 |
-
|
| 146 |
total_time = time.time() - start_time
|
| 147 |
-
print(f"🎉 Total: {total_time:.1f}s, Size: {len(video_bytes)/1024/1024:.2f}MB")
|
| 148 |
-
|
| 149 |
return video_bytes
|
| 150 |
|
| 151 |
|
| 152 |
@app.function(image=image)
|
| 153 |
@modal.web_endpoint(method="POST")
|
| 154 |
def process_video_endpoint(data: dict):
|
| 155 |
-
"""
|
| 156 |
video_url = data.get("video_url")
|
| 157 |
quote_text = data.get("quote_text")
|
| 158 |
-
audio_b64 = data.get("audio_b64")
|
| 159 |
-
|
|
|
|
| 160 |
if not video_url or not quote_text:
|
| 161 |
return {"error": "Missing video_url or quote_text"}, 400
|
| 162 |
-
|
| 163 |
try:
|
| 164 |
-
video_bytes = process_quote_video.remote(video_url, quote_text, audio_b64)
|
| 165 |
-
|
| 166 |
import base64
|
|
|
|
| 167 |
video_b64 = base64.b64encode(video_bytes).decode()
|
| 168 |
-
|
| 169 |
return {
|
| 170 |
"success": True,
|
| 171 |
"video": video_b64,
|
| 172 |
-
"size_mb": len(video_bytes) / 1024 / 1024
|
| 173 |
}
|
| 174 |
-
|
| 175 |
except Exception as e:
|
| 176 |
return {"error": str(e)}, 500
|
| 177 |
|
|
@@ -181,38 +239,46 @@ def process_video_endpoint(data: dict):
|
|
| 181 |
def process_batch_endpoint(data: dict):
|
| 182 |
"""
|
| 183 |
Batch endpoint - process multiple videos in PARALLEL.
|
| 184 |
-
Much faster for generating 2-3 variations!
|
| 185 |
"""
|
| 186 |
videos_data = data.get("videos", [])
|
| 187 |
-
|
| 188 |
if not videos_data:
|
| 189 |
return {"error": "Missing videos array"}, 400
|
| 190 |
-
|
| 191 |
try:
|
| 192 |
-
#
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
import base64
|
|
|
|
| 201 |
encoded_results = []
|
| 202 |
for video_bytes in results:
|
| 203 |
video_b64 = base64.b64encode(video_bytes).decode()
|
| 204 |
-
encoded_results.append(
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
|
|
|
|
|
|
| 210 |
return {
|
| 211 |
"success": True,
|
| 212 |
"videos": encoded_results,
|
| 213 |
-
"count": len(encoded_results)
|
| 214 |
}
|
| 215 |
-
|
| 216 |
except Exception as e:
|
| 217 |
-
return {"error": str(e)}, 500
|
| 218 |
-
|
|
|
|
| 18 |
"fastapi"
|
| 19 |
)
|
| 20 |
|
| 21 |
+
|
| 22 |
@app.function(
|
| 23 |
image=image,
|
| 24 |
cpu=2,
|
|
|
|
| 28 |
allow_concurrent_inputs=10, # Process multiple in parallel
|
| 29 |
container_idle_timeout=120,
|
| 30 |
)
|
| 31 |
+
def process_quote_video(
|
| 32 |
+
video_url: str,
|
| 33 |
+
quote_text: str,
|
| 34 |
+
audio_b64: str = None,
|
| 35 |
+
text_style: str = "classic_center"
|
| 36 |
+
) -> bytes:
|
| 37 |
"""
|
| 38 |
+
Process quote video on Modal - FAST version.
|
| 39 |
+
Supports multiple text styles / font layouts.
|
| 40 |
"""
|
| 41 |
import tempfile
|
| 42 |
import requests
|
|
|
|
| 44 |
from PIL import Image, ImageDraw, ImageFont
|
| 45 |
import numpy as np
|
| 46 |
import time
|
| 47 |
+
|
| 48 |
start_time = time.time()
|
| 49 |
+
|
| 50 |
# Download video
|
| 51 |
response = requests.get(video_url, stream=True, timeout=30)
|
| 52 |
response.raise_for_status()
|
| 53 |
+
|
| 54 |
+
temp_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
|
| 55 |
+
with open(temp_video.name, "wb") as f:
|
| 56 |
+
for chunk in response.iter_content(chunk_size=1024 * 1024):
|
| 57 |
f.write(chunk)
|
| 58 |
+
|
| 59 |
# Load video
|
| 60 |
video = VideoFileClip(temp_video.name)
|
| 61 |
+
|
| 62 |
+
# Optional: trim to first 10s to keep things snappy
|
| 63 |
if video.duration > 10:
|
| 64 |
video = video.subclip(0, 10)
|
| 65 |
+
|
| 66 |
w, h = video.size
|
| 67 |
+
|
| 68 |
+
# Choose layout + font behavior based on text_style
|
| 69 |
+
# Supported:
|
| 70 |
+
# - "classic_center" → centered, sans serif (default)
|
| 71 |
+
# - "lower_third_serif" → bottom, serif
|
| 72 |
+
# - "typewriter_top" → top, monospace vibe
|
| 73 |
def make_text_frame(t):
|
| 74 |
+
img = Image.new("RGBA", (w, h), (0, 0, 0, 0))
|
| 75 |
draw = ImageDraw.Draw(img)
|
| 76 |
+
|
| 77 |
+
# Base font size
|
| 78 |
+
base_font_size = int(h * 0.03)
|
| 79 |
+
|
| 80 |
+
# Defaults
|
| 81 |
+
font_paths = []
|
| 82 |
+
y_mode = "center"
|
| 83 |
+
font_size = base_font_size
|
| 84 |
+
|
| 85 |
+
if text_style == "lower_third_serif":
|
| 86 |
+
font_paths = [
|
| 87 |
+
"/usr/share/fonts/truetype/dejavu/DejaVuSerif-Bold.ttf",
|
| 88 |
+
"/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf",
|
| 89 |
+
]
|
| 90 |
+
y_mode = "lower_third"
|
| 91 |
+
font_size = int(h * 0.032)
|
| 92 |
+
elif text_style == "typewriter_top":
|
| 93 |
+
font_paths = [
|
| 94 |
+
"/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf",
|
| 95 |
+
"/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf",
|
| 96 |
+
]
|
| 97 |
+
y_mode = "top"
|
| 98 |
+
font_size = int(h * 0.028)
|
| 99 |
+
else:
|
| 100 |
+
# classic_center
|
| 101 |
+
font_paths = [
|
| 102 |
+
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
|
| 103 |
+
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
|
| 104 |
+
]
|
| 105 |
+
y_mode = "center"
|
| 106 |
+
font_size = base_font_size
|
| 107 |
+
|
| 108 |
+
# Try fonts, fall back to default
|
| 109 |
+
font = None
|
| 110 |
+
for path in font_paths:
|
| 111 |
+
try:
|
| 112 |
+
font = ImageFont.truetype(path, font_size)
|
| 113 |
+
break
|
| 114 |
+
except Exception:
|
| 115 |
+
continue
|
| 116 |
+
if font is None:
|
| 117 |
font = ImageFont.load_default()
|
| 118 |
+
|
|
|
|
|
|
|
| 119 |
# Wrap text
|
| 120 |
+
max_width = int(w * 0.7 if text_style != "classic_center" else w * 0.6)
|
| 121 |
words = quote_text.split()
|
| 122 |
lines = []
|
| 123 |
current_line = []
|
| 124 |
+
|
| 125 |
for word in words:
|
| 126 |
+
test_line = " ".join(current_line + [word])
|
| 127 |
bbox = draw.textbbox((0, 0), test_line, font=font)
|
| 128 |
text_width = bbox[2] - bbox[0]
|
| 129 |
+
|
| 130 |
if text_width <= max_width:
|
| 131 |
current_line.append(word)
|
| 132 |
else:
|
| 133 |
if current_line:
|
| 134 |
+
lines.append(" ".join(current_line))
|
| 135 |
current_line = [word]
|
| 136 |
else:
|
| 137 |
lines.append(word)
|
| 138 |
+
|
| 139 |
if current_line:
|
| 140 |
+
lines.append(" ".join(current_line))
|
| 141 |
+
|
| 142 |
+
# Line spacing
|
| 143 |
line_spacing = int(font_size * 0.4)
|
| 144 |
text_block_height = len(lines) * (font_size + line_spacing)
|
| 145 |
+
|
| 146 |
+
# Vertical placement
|
| 147 |
+
if y_mode == "top":
|
| 148 |
+
y = int(h * 0.10)
|
| 149 |
+
elif y_mode == "lower_third":
|
| 150 |
+
y = int(h * 0.65)
|
| 151 |
+
else: # center
|
| 152 |
+
y = (h - text_block_height) // 2
|
| 153 |
+
|
| 154 |
+
# Draw lines
|
| 155 |
for line in lines:
|
| 156 |
bbox = draw.textbbox((0, 0), line, font=font)
|
| 157 |
text_width = bbox[2] - bbox[0]
|
| 158 |
x = (w - text_width) // 2
|
| 159 |
+
|
| 160 |
+
# Outline / stroke
|
| 161 |
outline_width = max(2, int(font_size * 0.08))
|
| 162 |
for adj_x in range(-outline_width, outline_width + 1):
|
| 163 |
for adj_y in range(-outline_width, outline_width + 1):
|
| 164 |
+
draw.text((x + adj_x, y + adj_y), line, font=font, fill="black")
|
| 165 |
+
|
| 166 |
+
# Main text
|
| 167 |
+
draw.text((x, y), line, font=font, fill="white")
|
| 168 |
y += font_size + line_spacing
|
| 169 |
+
|
| 170 |
return np.array(img)
|
| 171 |
+
|
| 172 |
text_clip = ImageClip(make_text_frame(0), duration=video.duration)
|
| 173 |
+
|
| 174 |
# Composite
|
| 175 |
final_video = CompositeVideoClip([video, text_clip])
|
| 176 |
+
|
| 177 |
# Export - FAST settings
|
| 178 |
+
output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
|
|
|
|
| 179 |
final_video.write_videofile(
|
| 180 |
output_file.name,
|
| 181 |
+
codec="libx264",
|
| 182 |
+
audio_codec="aac",
|
| 183 |
fps=10,
|
| 184 |
+
preset="ultrafast",
|
| 185 |
threads=2,
|
| 186 |
verbose=False,
|
| 187 |
logger=None,
|
| 188 |
bitrate="400k",
|
| 189 |
+
ffmpeg_params=["-crf", "30", "-g", "30"],
|
| 190 |
)
|
| 191 |
+
|
| 192 |
# Read bytes
|
| 193 |
+
with open(output_file.name, "rb") as f:
|
| 194 |
video_bytes = f.read()
|
| 195 |
+
|
| 196 |
# Cleanup
|
| 197 |
video.close()
|
| 198 |
final_video.close()
|
| 199 |
os.unlink(temp_video.name)
|
| 200 |
os.unlink(output_file.name)
|
| 201 |
+
|
| 202 |
total_time = time.time() - start_time
|
| 203 |
+
print(f"🎉 Total: {total_time:.1f}s, Size: {len(video_bytes)/1024/1024:.2f}MB, Style: {text_style}")
|
| 204 |
+
|
| 205 |
return video_bytes
|
| 206 |
|
| 207 |
|
| 208 |
@app.function(image=image)
|
| 209 |
@modal.web_endpoint(method="POST")
|
| 210 |
def process_video_endpoint(data: dict):
|
| 211 |
+
"""Single video web endpoint"""
|
| 212 |
video_url = data.get("video_url")
|
| 213 |
quote_text = data.get("quote_text")
|
| 214 |
+
audio_b64 = data.get("audio_b64") # ignored for now
|
| 215 |
+
text_style = data.get("text_style", "classic_center")
|
| 216 |
+
|
| 217 |
if not video_url or not quote_text:
|
| 218 |
return {"error": "Missing video_url or quote_text"}, 400
|
| 219 |
+
|
| 220 |
try:
|
| 221 |
+
video_bytes = process_quote_video.remote(video_url, quote_text, audio_b64, text_style)
|
| 222 |
+
|
| 223 |
import base64
|
| 224 |
+
|
| 225 |
video_b64 = base64.b64encode(video_bytes).decode()
|
| 226 |
+
|
| 227 |
return {
|
| 228 |
"success": True,
|
| 229 |
"video": video_b64,
|
| 230 |
+
"size_mb": len(video_bytes) / 1024 / 1024,
|
| 231 |
}
|
| 232 |
+
|
| 233 |
except Exception as e:
|
| 234 |
return {"error": str(e)}, 500
|
| 235 |
|
|
|
|
| 239 |
def process_batch_endpoint(data: dict):
|
| 240 |
"""
|
| 241 |
Batch endpoint - process multiple videos in PARALLEL.
|
|
|
|
| 242 |
"""
|
| 243 |
videos_data = data.get("videos", [])
|
| 244 |
+
|
| 245 |
if not videos_data:
|
| 246 |
return {"error": "Missing videos array"}, 400
|
| 247 |
+
|
| 248 |
try:
|
| 249 |
+
# Extract per-video parameters
|
| 250 |
+
video_urls = [v["video_url"] for v in videos_data]
|
| 251 |
+
quote_texts = [v["quote_text"] for v in videos_data]
|
| 252 |
+
audio_b64s = [v.get("audio_b64") for v in videos_data]
|
| 253 |
+
text_styles = [v.get("text_style", "classic_center") for v in videos_data]
|
| 254 |
+
|
| 255 |
+
results = list(
|
| 256 |
+
process_quote_video.map(
|
| 257 |
+
video_urls,
|
| 258 |
+
quote_texts,
|
| 259 |
+
audio_b64s,
|
| 260 |
+
text_styles,
|
| 261 |
+
)
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
import base64
|
| 265 |
+
|
| 266 |
encoded_results = []
|
| 267 |
for video_bytes in results:
|
| 268 |
video_b64 = base64.b64encode(video_bytes).decode()
|
| 269 |
+
encoded_results.append(
|
| 270 |
+
{
|
| 271 |
+
"success": True,
|
| 272 |
+
"video": video_b64,
|
| 273 |
+
"size_mb": len(video_bytes) / 1024 / 1024,
|
| 274 |
+
}
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
return {
|
| 278 |
"success": True,
|
| 279 |
"videos": encoded_results,
|
| 280 |
+
"count": len(encoded_results),
|
| 281 |
}
|
| 282 |
+
|
| 283 |
except Exception as e:
|
| 284 |
+
return {"error": str(e)}, 500
|
|
|