gdrive-pdf-translator-mcp / modal_deploy.py
h-xml's picture
Upload 6 files
92a0b42 verified
"""
BabelDOC with Agentic AI - Modal Deployment
PDF translation API with layout preservation.
20-page limit during test phase.
Setup:
modal secret create babeldocs-secrets \
NEBIUS_API_KEY=your_key \
NEBIUS_API_BASE=https://api.tokenfactory.nebius.com/v1/ \
NEBIUS_TRANSLATION_MODEL=openai/gpt-oss-120b
Deploy:
modal deploy modal_deploy.py
"""
import modal
import os
from pathlib import Path
THIS_DIR = Path(__file__).parent.resolve()
BABELDOC_DIR = THIS_DIR.parent / "BabelDOC"
# Max pages allowed (test phase limit)
MAX_PAGES = 20
# Modal app - custom name for hackathon
app = modal.App("mcp1stann-babeldocs")
# Image with uv and BabelDOC installed
babeldocs_image = (
modal.Image.debian_slim(python_version="3.11")
.apt_install(
"git",
"libgl1-mesa-glx",
"libglib2.0-0",
"libsm6",
"libxext6",
"libxrender-dev",
"libgomp1",
"curl",
"libspatialindex-dev", # For rtree
"libharfbuzz-dev", # For uharfbuzz
"libfreetype6-dev", # For freetype-py
"libopencv-dev", # For opencv dependencies
"libzstd-dev", # For pyzstd
)
.pip_install("uv")
.env({
"PYTHONIOENCODING": "utf-8",
"PYTHONUNBUFFERED": "1",
"UV_SYSTEM_PYTHON": "1",
})
.pip_install("fastapi[standard]")
.add_local_dir(
str(BABELDOC_DIR),
remote_path="/app/BabelDOC",
copy=True,
)
.run_commands(
"cd /app/BabelDOC && uv pip install -e . --python python3.11",
)
)
# Volume for caching models and fonts
cache_volume = modal.Volume.from_name("babeldocs-cache", create_if_missing=True)
CACHE_PATH = "/cache"
@app.cls(
image=babeldocs_image,
timeout=900, # 15 minutes
memory=8192,
cpu=4,
volumes={CACHE_PATH: cache_volume},
secrets=[modal.Secret.from_name("babeldocs-secrets")],
scaledown_window=300, # Keep warm for 5 minutes
)
class BabelDocsTranslator:
"""Class-based translator for BabelDOC (based on working SVG generator pattern)."""
def _count_pdf_pages(self, pdf_bytes: bytes) -> int:
"""Count pages in PDF using PyMuPDF."""
try:
import fitz # PyMuPDF
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
count = len(doc)
doc.close()
return count
except Exception:
return -1 # Unknown
def _translate_internal(
self,
pdf_base64: str,
target_lang: str = "fr",
pages: str = "",
no_dual: bool = False,
no_mono: bool = False,
) -> dict:
"""BabelDOC with Agentic AI - Internal translation."""
import base64
import subprocess
import tempfile
from pathlib import Path
from datetime import datetime
try:
if not pdf_base64:
return {"success": False, "message": "No PDF provided"}
pdf_bytes = base64.b64decode(pdf_base64)
# Check page limit (test phase)
page_count = self._count_pdf_pages(pdf_bytes)
if page_count > MAX_PAGES:
return {
"success": False,
"message": f"PDF has {page_count} pages. Maximum allowed: {MAX_PAGES} pages (test phase limit)."
}
with tempfile.TemporaryDirectory() as tmpdir:
input_path = Path(tmpdir) / "input.pdf"
output_dir = Path(tmpdir) / "output"
output_dir.mkdir()
input_path.write_bytes(pdf_bytes)
cmd = [
"babeldoc",
"--files", str(input_path),
"--output", str(output_dir),
"--lang-out", target_lang,
"--openai",
"--openai-model", os.getenv("NEBIUS_TRANSLATION_MODEL", "openai/gpt-oss-120b"),
"--openai-base-url", os.getenv("NEBIUS_API_BASE", "https://api.tokenfactory.nebius.com/v1/"),
"--openai-api-key", os.getenv("NEBIUS_API_KEY", ""),
"--no-watermark",
"--translate-table-text",
"--enhance-compatibility",
# Enable image translation (orchestration PASS 2) with vision model
"--vision-model", os.getenv("NEBIUS_VISION_MODEL", "Qwen/Qwen2.5-VL-72B-Instruct"),
]
if pages:
cmd.extend(["--pages", pages])
cmd.append("--only-include-translated-page")
if no_dual:
cmd.append("--no-dual")
if no_mono:
cmd.append("--no-mono")
start_time = datetime.now()
result = subprocess.run(
cmd,
capture_output=True,
text=True,
encoding="utf-8",
errors="replace",
cwd="/app/BabelDOC",
env={
**os.environ,
"HF_HOME": CACHE_PATH,
},
)
duration = (datetime.now() - start_time).total_seconds()
if result.returncode != 0:
return {
"success": False,
"message": "Translation failed",
"stderr": result.stderr[:1000] if result.stderr else "",
"stdout": result.stdout[:500] if result.stdout else "",
}
# Find all 4 types of PDFs:
# Format: name.no_watermark.{lang}.{mono|dual}.pdf
# Format: name.no_watermark.{lang}.{mono|dual}.images_translated.pdf
# Get all PDFs in output directory
all_pdfs = list(output_dir.glob("*.pdf"))
# Categorize by type
mono_matches = [p for p in all_pdfs if f".{target_lang}.mono.pdf" in p.name and "images_translated" not in p.name]
mono_img_matches = [p for p in all_pdfs if f".{target_lang}.mono.images_translated.pdf" in p.name]
dual_matches = [p for p in all_pdfs if f".{target_lang}.dual.pdf" in p.name and "images_translated" not in p.name]
dual_img_matches = [p for p in all_pdfs if f".{target_lang}.dual.images_translated.pdf" in p.name]
mono_pdf = mono_matches[0] if mono_matches else None
mono_img_pdf = mono_img_matches[0] if mono_img_matches else None
dual_pdf = dual_matches[0] if dual_matches else None
dual_img_pdf = dual_img_matches[0] if dual_img_matches else None
if not any([mono_pdf, mono_img_pdf, dual_pdf, dual_img_pdf]):
# Fallback to any PDF
if not all_pdfs:
return {"success": False, "message": "No output PDF generated"}
mono_pdf = all_pdfs[0]
result_data = {
"success": True,
"stats": {
"duration_seconds": round(duration, 2),
}
}
# Add mono PDF (without image translation)
if mono_pdf and not no_mono:
mono_bytes = mono_pdf.read_bytes()
result_data["mono_pdf_base64"] = base64.b64encode(mono_bytes).decode("utf-8")
result_data["mono_filename"] = mono_pdf.name
result_data["stats"]["mono_size_bytes"] = len(mono_bytes)
# Add mono PDF with image translation
if mono_img_pdf and not no_mono:
mono_img_bytes = mono_img_pdf.read_bytes()
result_data["mono_img_pdf_base64"] = base64.b64encode(mono_img_bytes).decode("utf-8")
result_data["mono_img_filename"] = mono_img_pdf.name
result_data["stats"]["mono_img_size_bytes"] = len(mono_img_bytes)
# Add dual PDF (without image translation)
if dual_pdf and not no_dual:
dual_bytes = dual_pdf.read_bytes()
result_data["dual_pdf_base64"] = base64.b64encode(dual_bytes).decode("utf-8")
result_data["dual_filename"] = dual_pdf.name
result_data["stats"]["dual_size_bytes"] = len(dual_bytes)
# Add dual PDF with image translation
if dual_img_pdf and not no_dual:
dual_img_bytes = dual_img_pdf.read_bytes()
result_data["dual_img_pdf_base64"] = base64.b64encode(dual_img_bytes).decode("utf-8")
result_data["dual_img_filename"] = dual_img_pdf.name
result_data["stats"]["dual_img_size_bytes"] = len(dual_img_bytes)
return result_data
except Exception as e:
return {"success": False, "message": f"Error: {str(e)}"}
@modal.method()
def translate(
self,
pdf_base64: str,
target_lang: str = "fr",
pages: str = "",
no_dual: bool = False,
no_mono: bool = False,
) -> dict:
"""Translate method (callable via Modal)."""
return self._translate_internal(pdf_base64, target_lang, pages, no_dual, no_mono)
@modal.fastapi_endpoint(method="POST")
def api(self, request: dict) -> dict:
"""
FastAPI endpoint POST for PDF translation.
Request body:
{
"pdf_base64": "base64_encoded_pdf",
"target_lang": "fr",
"pages": "1,2,3" (optional),
"no_dual": false,
"no_mono": false
}
"""
pdf_base64 = request.get("pdf_base64", "")
target_lang = request.get("target_lang", "fr")
pages = request.get("pages", "")
no_dual = request.get("no_dual", False)
no_mono = request.get("no_mono", False)
return self._translate_internal(pdf_base64, target_lang, pages, no_dual, no_mono)
@modal.fastapi_endpoint(method="GET")
def health(self) -> dict:
"""Health check endpoint."""
return {
"status": "healthy",
"service": "BabelDOC with Agentic AI",
"version": "1.0.0",
"max_pages": MAX_PAGES,
}
@modal.fastapi_endpoint(method="GET")
def languages(self) -> dict:
"""Get supported languages."""
return {
"languages": {
"fr": "French",
"en": "English",
"es": "Spanish",
"de": "German",
"it": "Italian",
"pt": "Portuguese",
"zh": "Chinese",
"ja": "Japanese",
"ko": "Korean",
"ru": "Russian",
"ar": "Arabic",
}
}
@app.local_entrypoint()
def main():
"""BabelDOC with Agentic AI - Local test."""
print("BabelDOC with Agentic AI - Modal Deployment")
print("=" * 45)
print(f"Max pages: {MAX_PAGES} (test phase)")
print()
print("Deploy: modal deploy modal_deploy.py")
print("Test: modal serve modal_deploy.py")