File size: 8,223 Bytes
59e4f9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f051ee
59e4f9e
 
6038041
59e4f9e
 
fb7805c
 
 
2f051ee
 
f081221
59e4f9e
6038041
 
 
 
2f051ee
6038041
59e4f9e
6038041
 
59e4f9e
 
 
4908797
59e4f9e
 
fb7805c
6038041
fb7805c
6038041
4908797
59e4f9e
 
6038041
 
 
 
59e4f9e
6038041
59e4f9e
 
6038041
2f051ee
fb7805c
 
6038041
59e4f9e
6038041
 
 
2f051ee
 
 
59e4f9e
6038041
59e4f9e
6038041
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f051ee
6038041
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4908797
6038041
59e4f9e
6038041
59e4f9e
 
 
6038041
59e4f9e
6038041
59e4f9e
 
6038041
59e4f9e
 
 
 
6038041
59e4f9e
 
 
6038041
59e4f9e
6038041
 
 
59e4f9e
 
6038041
 
 
 
 
 
 
 
 
 
59e4f9e
 
 
 
6038041
 
59e4f9e
 
 
6038041
 
 
 
59e4f9e
6038041
59e4f9e
6038041
59e4f9e
6038041
59e4f9e
 
6038041
4908797
6038041
59e4f9e
 
6038041
 
4908797
6038041
fb7805c
59e4f9e
1475643
66d5109
6038041
59e4f9e
6038041
4908797
6038041
59e4f9e
6038041
59e4f9e
 
 
 
 
6038041
fb7805c
2f051ee
 
 
6038041
59e4f9e
 
 
 
 
 
6038041
59e4f9e
 
6038041
 
 
59e4f9e
 
6038041
59e4f9e
2f051ee
 
 
6038041
59e4f9e
6038041
59e4f9e
6038041
59e4f9e
 
 
6038041
59e4f9e
6038041
59e4f9e
 
c1b7466
 
 
 
 
 
 
 
 
6038041
c1b7466
 
6038041
c1b7466
6038041
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1b7466
6038041
c1b7466
 
 
6038041
 
 
 
 
 
 
 
c1b7466
 
 
6038041
c1b7466
6038041
c1b7466
2f051ee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
# modal_video_processing.py
# Deploy with: modal deploy modal_video_processing.py

import modal
import os

# Create Modal app
app = modal.App("aiquoteclipgenerator")

# Define image with all dependencies
image = modal.Image.debian_slim(python_version="3.11").pip_install(
    "moviepy==1.0.3",
    "pillow",
    "numpy",
    "imageio==2.31.1",
    "imageio-ffmpeg",
    "requests",
    "fastapi",
)


@app.function(
    image=image,
    cpu=2,
    memory=2048,
    timeout=180,
    concurrency_limit=10,
    allow_concurrent_inputs=10,
    container_idle_timeout=120,
)
def process_quote_video(
    video_url: str,
    quote_text: str,
    audio_b64: str = None,
    text_style: str = "classic_center",
) -> bytes:
    """
    Process quote video on Modal - FAST version.
    Supports multiple text styles / font layouts.
    """
    import tempfile
    import requests
    from moviepy.editor import VideoFileClip, ImageClip, CompositeVideoClip
    from PIL import Image, ImageDraw, ImageFont
    import numpy as np
    import time

    start_time = time.time()

    # Download video
    response = requests.get(video_url, stream=True, timeout=30)
    response.raise_for_status()

    temp_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
    with open(temp_video.name, "wb") as f:
        for chunk in response.iter_content(chunk_size=1024 * 1024):
            f.write(chunk)

    # Load video
    video = VideoFileClip(temp_video.name)

    # Trim to first 10 seconds
    if video.duration > 10:
        video = video.subclip(0, 10)

    w, h = video.size

    # Choose layout + font behavior based on text_style
    # Supported:
    # - "classic_center"      β†’ centered, sans serif (default)
    # - "lower_third_serif"   β†’ bottom, serif
    # - "typewriter_top"      β†’ top, monospace-ish
    def make_text_frame(t):
        img = Image.new("RGBA", (w, h), (0, 0, 0, 0))
        draw = ImageDraw.Draw(img)

        base_font_size = int(h * 0.03)

        font_paths = []
        y_mode = "center"
        font_size = base_font_size

        if text_style == "lower_third_serif":
            font_paths = [
                "/usr/share/fonts/truetype/dejavu/DejaVuSerif-Bold.ttf",
                "/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf",
            ]
            y_mode = "lower_third"
            font_size = int(h * 0.032)
        elif text_style == "typewriter_top":
            font_paths = [
                "/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf",
                "/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf",
            ]
            y_mode = "top"
            font_size = int(h * 0.028)
        else:  # classic_center
            font_paths = [
                "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
                "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
            ]
            y_mode = "center"
            font_size = base_font_size

        # Try fonts, fall back to default
        font = None
        for path in font_paths:
            try:
                font = ImageFont.truetype(path, font_size)
                break
            except Exception:
                continue
        if font is None:
            font = ImageFont.load_default()

        # Wrap text
        max_width = int(w * 0.7 if text_style != "classic_center" else w * 0.6)
        words = quote_text.split()
        lines = []
        current_line = []

        for word in words:
            test_line = " ".join(current_line + [word])
            bbox = draw.textbbox((0, 0), test_line, font=font)
            text_width = bbox[2] - bbox[0]

            if text_width <= max_width:
                current_line.append(word)
            else:
                if current_line:
                    lines.append(" ".join(current_line))
                    current_line = [word]
                else:
                    lines.append(word)

        if current_line:
            lines.append(" ".join(current_line))

        # Line spacing
        line_spacing = int(font_size * 0.4)
        text_block_height = len(lines) * (font_size + line_spacing)

        # Vertical placement
        if y_mode == "top":
            y = int(h * 0.10)
        elif y_mode == "lower_third":
            y = int(h * 0.65)
        else:  # center
            y = (h - text_block_height) // 2

        # Draw lines
        for line in lines:
            bbox = draw.textbbox((0, 0), line, font=font)
            text_width = bbox[2] - bbox[0]
            x = (w - text_width) // 2

            # Outline / stroke
            outline_width = max(2, int(font_size * 0.08))
            for adj_x in range(-outline_width, outline_width + 1):
                for adj_y in range(-outline_width, outline_width + 1):
                    draw.text((x + adj_x, y + adj_y), line, font=font, fill="black")

            # Main text
            draw.text((x, y), line, font=font, fill="white")
            y += font_size + line_spacing

        return np.array(img)

    text_clip = ImageClip(make_text_frame(0), duration=video.duration)

    # Composite
    final_video = CompositeVideoClip([video, text_clip])

    # Export - FAST settings
    output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
    final_video.write_videofile(
        output_file.name,
        codec="libx264",
        audio_codec="aac",
        fps=10,
        preset="ultrafast",
        threads=2,
        verbose=False,
        logger=None,
        bitrate="400k",
        ffmpeg_params=["-crf", "30", "-g", "30"],
    )

    # Read bytes
    with open(output_file.name, "rb") as f:
        video_bytes = f.read()

    # Cleanup
    video.close()
    final_video.close()
    os.unlink(temp_video.name)
    os.unlink(output_file.name)

    total_time = time.time() - start_time
    print(
        f"πŸŽ‰ Total: {total_time:.1f}s, Size: {len(video_bytes)/1024/1024:.2f}MB, Style: {text_style}"
    )

    return video_bytes


@app.function(image=image)
@modal.web_endpoint(method="POST")
def process_video_endpoint(data: dict):
    """Single video web endpoint"""
    video_url = data.get("video_url")
    quote_text = data.get("quote_text")
    audio_b64 = data.get("audio_b64")  # ignored for now
    text_style = data.get("text_style", "classic_center")

    if not video_url or not quote_text:
        return {"error": "Missing video_url or quote_text"}, 400

    try:
        video_bytes = process_quote_video.remote(
            video_url, quote_text, audio_b64, text_style
        )

        import base64

        video_b64 = base64.b64encode(video_bytes).decode()

        return {
            "success": True,
            "video": video_b64,
            "size_mb": len(video_bytes) / 1024 / 1024,
        }

    except Exception as e:
        return {"error": str(e)}, 500


@app.function(image=image)
@modal.web_endpoint(method="POST")
def process_batch_endpoint(data: dict):
    """
    Batch endpoint - process multiple videos in PARALLEL.
    """
    videos_data = data.get("videos", [])

    if not videos_data:
        return {"error": "Missing videos array"}, 400

    try:
        # Extract per-video parameters
        video_urls = [v["video_url"] for v in videos_data]
        quote_texts = [v["quote_text"] for v in videos_data]
        audio_b64s = [v.get("audio_b64") for v in videos_data]
        text_styles = [v.get("text_style", "classic_center") for v in videos_data]

        results = list(
            process_quote_video.map(
                video_urls,
                quote_texts,
                audio_b64s,
                text_styles,
            )
        )

        import base64

        encoded_results = []
        for video_bytes in results:
            video_b64 = base64.b64encode(video_bytes).decode()
            encoded_results.append(
                {
                    "success": True,
                    "video": video_b64,
                    "size_mb": len(video_bytes) / 1024 / 1024,
                }
            )

        return {
            "success": True,
            "videos": encoded_results,
            "count": len(encoded_results),
        }

    except Exception as e:
        return {"error": str(e)}, 500