|
|
"""
|
|
|
BabelDOC with Agentic AI - Modal Deployment
|
|
|
|
|
|
PDF translation API with layout preservation.
|
|
|
20-page limit during test phase.
|
|
|
|
|
|
Setup:
|
|
|
modal secret create babeldocs-secrets \
|
|
|
NEBIUS_API_KEY=your_key \
|
|
|
NEBIUS_API_BASE=https://api.tokenfactory.nebius.com/v1/ \
|
|
|
NEBIUS_TRANSLATION_MODEL=openai/gpt-oss-120b
|
|
|
|
|
|
Deploy:
|
|
|
modal deploy modal_deploy.py
|
|
|
"""
|
|
|
|
|
|
import modal
|
|
|
import os
|
|
|
from pathlib import Path
|
|
|
|
|
|
THIS_DIR = Path(__file__).parent.resolve()
|
|
|
BABELDOC_DIR = THIS_DIR.parent / "BabelDOC"
|
|
|
|
|
|
|
|
|
MAX_PAGES = 20
|
|
|
|
|
|
|
|
|
app = modal.App("mcp1stann-babeldocs")
|
|
|
|
|
|
|
|
|
babeldocs_image = (
|
|
|
modal.Image.debian_slim(python_version="3.11")
|
|
|
.apt_install(
|
|
|
"git",
|
|
|
"libgl1-mesa-glx",
|
|
|
"libglib2.0-0",
|
|
|
"libsm6",
|
|
|
"libxext6",
|
|
|
"libxrender-dev",
|
|
|
"libgomp1",
|
|
|
"curl",
|
|
|
"libspatialindex-dev",
|
|
|
"libharfbuzz-dev",
|
|
|
"libfreetype6-dev",
|
|
|
"libopencv-dev",
|
|
|
"libzstd-dev",
|
|
|
)
|
|
|
.pip_install("uv")
|
|
|
.env({
|
|
|
"PYTHONIOENCODING": "utf-8",
|
|
|
"PYTHONUNBUFFERED": "1",
|
|
|
"UV_SYSTEM_PYTHON": "1",
|
|
|
})
|
|
|
.pip_install("fastapi[standard]")
|
|
|
.add_local_dir(
|
|
|
str(BABELDOC_DIR),
|
|
|
remote_path="/app/BabelDOC",
|
|
|
copy=True,
|
|
|
)
|
|
|
.run_commands(
|
|
|
"cd /app/BabelDOC && uv pip install -e . --python python3.11",
|
|
|
)
|
|
|
)
|
|
|
|
|
|
|
|
|
cache_volume = modal.Volume.from_name("babeldocs-cache", create_if_missing=True)
|
|
|
CACHE_PATH = "/cache"
|
|
|
|
|
|
|
|
|
@app.cls(
|
|
|
image=babeldocs_image,
|
|
|
timeout=900,
|
|
|
memory=8192,
|
|
|
cpu=4,
|
|
|
volumes={CACHE_PATH: cache_volume},
|
|
|
secrets=[modal.Secret.from_name("babeldocs-secrets")],
|
|
|
scaledown_window=300,
|
|
|
)
|
|
|
class BabelDocsTranslator:
|
|
|
"""Class-based translator for BabelDOC (based on working SVG generator pattern)."""
|
|
|
|
|
|
def _count_pdf_pages(self, pdf_bytes: bytes) -> int:
|
|
|
"""Count pages in PDF using PyMuPDF."""
|
|
|
try:
|
|
|
import fitz
|
|
|
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
|
|
|
count = len(doc)
|
|
|
doc.close()
|
|
|
return count
|
|
|
except Exception:
|
|
|
return -1
|
|
|
|
|
|
def _translate_internal(
|
|
|
self,
|
|
|
pdf_base64: str,
|
|
|
target_lang: str = "fr",
|
|
|
pages: str = "",
|
|
|
no_dual: bool = False,
|
|
|
no_mono: bool = False,
|
|
|
) -> dict:
|
|
|
"""BabelDOC with Agentic AI - Internal translation."""
|
|
|
import base64
|
|
|
import subprocess
|
|
|
import tempfile
|
|
|
from pathlib import Path
|
|
|
from datetime import datetime
|
|
|
|
|
|
try:
|
|
|
if not pdf_base64:
|
|
|
return {"success": False, "message": "No PDF provided"}
|
|
|
|
|
|
pdf_bytes = base64.b64decode(pdf_base64)
|
|
|
|
|
|
|
|
|
page_count = self._count_pdf_pages(pdf_bytes)
|
|
|
if page_count > MAX_PAGES:
|
|
|
return {
|
|
|
"success": False,
|
|
|
"message": f"PDF has {page_count} pages. Maximum allowed: {MAX_PAGES} pages (test phase limit)."
|
|
|
}
|
|
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
|
input_path = Path(tmpdir) / "input.pdf"
|
|
|
output_dir = Path(tmpdir) / "output"
|
|
|
output_dir.mkdir()
|
|
|
|
|
|
input_path.write_bytes(pdf_bytes)
|
|
|
|
|
|
cmd = [
|
|
|
"babeldoc",
|
|
|
"--files", str(input_path),
|
|
|
"--output", str(output_dir),
|
|
|
"--lang-out", target_lang,
|
|
|
"--openai",
|
|
|
"--openai-model", os.getenv("NEBIUS_TRANSLATION_MODEL", "openai/gpt-oss-120b"),
|
|
|
"--openai-base-url", os.getenv("NEBIUS_API_BASE", "https://api.tokenfactory.nebius.com/v1/"),
|
|
|
"--openai-api-key", os.getenv("NEBIUS_API_KEY", ""),
|
|
|
"--no-watermark",
|
|
|
"--translate-table-text",
|
|
|
"--enhance-compatibility",
|
|
|
|
|
|
"--vision-model", os.getenv("NEBIUS_VISION_MODEL", "Qwen/Qwen2.5-VL-72B-Instruct"),
|
|
|
]
|
|
|
|
|
|
if pages:
|
|
|
cmd.extend(["--pages", pages])
|
|
|
cmd.append("--only-include-translated-page")
|
|
|
|
|
|
if no_dual:
|
|
|
cmd.append("--no-dual")
|
|
|
|
|
|
if no_mono:
|
|
|
cmd.append("--no-mono")
|
|
|
|
|
|
start_time = datetime.now()
|
|
|
|
|
|
result = subprocess.run(
|
|
|
cmd,
|
|
|
capture_output=True,
|
|
|
text=True,
|
|
|
encoding="utf-8",
|
|
|
errors="replace",
|
|
|
cwd="/app/BabelDOC",
|
|
|
env={
|
|
|
**os.environ,
|
|
|
"HF_HOME": CACHE_PATH,
|
|
|
},
|
|
|
)
|
|
|
|
|
|
duration = (datetime.now() - start_time).total_seconds()
|
|
|
|
|
|
if result.returncode != 0:
|
|
|
return {
|
|
|
"success": False,
|
|
|
"message": "Translation failed",
|
|
|
"stderr": result.stderr[:1000] if result.stderr else "",
|
|
|
"stdout": result.stdout[:500] if result.stdout else "",
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
all_pdfs = list(output_dir.glob("*.pdf"))
|
|
|
|
|
|
|
|
|
mono_matches = [p for p in all_pdfs if f".{target_lang}.mono.pdf" in p.name and "images_translated" not in p.name]
|
|
|
mono_img_matches = [p for p in all_pdfs if f".{target_lang}.mono.images_translated.pdf" in p.name]
|
|
|
dual_matches = [p for p in all_pdfs if f".{target_lang}.dual.pdf" in p.name and "images_translated" not in p.name]
|
|
|
dual_img_matches = [p for p in all_pdfs if f".{target_lang}.dual.images_translated.pdf" in p.name]
|
|
|
|
|
|
mono_pdf = mono_matches[0] if mono_matches else None
|
|
|
mono_img_pdf = mono_img_matches[0] if mono_img_matches else None
|
|
|
dual_pdf = dual_matches[0] if dual_matches else None
|
|
|
dual_img_pdf = dual_img_matches[0] if dual_img_matches else None
|
|
|
|
|
|
if not any([mono_pdf, mono_img_pdf, dual_pdf, dual_img_pdf]):
|
|
|
|
|
|
if not all_pdfs:
|
|
|
return {"success": False, "message": "No output PDF generated"}
|
|
|
mono_pdf = all_pdfs[0]
|
|
|
|
|
|
result_data = {
|
|
|
"success": True,
|
|
|
"stats": {
|
|
|
"duration_seconds": round(duration, 2),
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
if mono_pdf and not no_mono:
|
|
|
mono_bytes = mono_pdf.read_bytes()
|
|
|
result_data["mono_pdf_base64"] = base64.b64encode(mono_bytes).decode("utf-8")
|
|
|
result_data["mono_filename"] = mono_pdf.name
|
|
|
result_data["stats"]["mono_size_bytes"] = len(mono_bytes)
|
|
|
|
|
|
|
|
|
if mono_img_pdf and not no_mono:
|
|
|
mono_img_bytes = mono_img_pdf.read_bytes()
|
|
|
result_data["mono_img_pdf_base64"] = base64.b64encode(mono_img_bytes).decode("utf-8")
|
|
|
result_data["mono_img_filename"] = mono_img_pdf.name
|
|
|
result_data["stats"]["mono_img_size_bytes"] = len(mono_img_bytes)
|
|
|
|
|
|
|
|
|
if dual_pdf and not no_dual:
|
|
|
dual_bytes = dual_pdf.read_bytes()
|
|
|
result_data["dual_pdf_base64"] = base64.b64encode(dual_bytes).decode("utf-8")
|
|
|
result_data["dual_filename"] = dual_pdf.name
|
|
|
result_data["stats"]["dual_size_bytes"] = len(dual_bytes)
|
|
|
|
|
|
|
|
|
if dual_img_pdf and not no_dual:
|
|
|
dual_img_bytes = dual_img_pdf.read_bytes()
|
|
|
result_data["dual_img_pdf_base64"] = base64.b64encode(dual_img_bytes).decode("utf-8")
|
|
|
result_data["dual_img_filename"] = dual_img_pdf.name
|
|
|
result_data["stats"]["dual_img_size_bytes"] = len(dual_img_bytes)
|
|
|
|
|
|
return result_data
|
|
|
|
|
|
except Exception as e:
|
|
|
return {"success": False, "message": f"Error: {str(e)}"}
|
|
|
|
|
|
@modal.method()
|
|
|
def translate(
|
|
|
self,
|
|
|
pdf_base64: str,
|
|
|
target_lang: str = "fr",
|
|
|
pages: str = "",
|
|
|
no_dual: bool = False,
|
|
|
no_mono: bool = False,
|
|
|
) -> dict:
|
|
|
"""Translate method (callable via Modal)."""
|
|
|
return self._translate_internal(pdf_base64, target_lang, pages, no_dual, no_mono)
|
|
|
|
|
|
@modal.fastapi_endpoint(method="POST")
|
|
|
def api(self, request: dict) -> dict:
|
|
|
"""
|
|
|
FastAPI endpoint POST for PDF translation.
|
|
|
|
|
|
Request body:
|
|
|
{
|
|
|
"pdf_base64": "base64_encoded_pdf",
|
|
|
"target_lang": "fr",
|
|
|
"pages": "1,2,3" (optional),
|
|
|
"no_dual": false,
|
|
|
"no_mono": false
|
|
|
}
|
|
|
"""
|
|
|
pdf_base64 = request.get("pdf_base64", "")
|
|
|
target_lang = request.get("target_lang", "fr")
|
|
|
pages = request.get("pages", "")
|
|
|
no_dual = request.get("no_dual", False)
|
|
|
no_mono = request.get("no_mono", False)
|
|
|
|
|
|
return self._translate_internal(pdf_base64, target_lang, pages, no_dual, no_mono)
|
|
|
|
|
|
@modal.fastapi_endpoint(method="GET")
|
|
|
def health(self) -> dict:
|
|
|
"""Health check endpoint."""
|
|
|
return {
|
|
|
"status": "healthy",
|
|
|
"service": "BabelDOC with Agentic AI",
|
|
|
"version": "1.0.0",
|
|
|
"max_pages": MAX_PAGES,
|
|
|
}
|
|
|
|
|
|
@modal.fastapi_endpoint(method="GET")
|
|
|
def languages(self) -> dict:
|
|
|
"""Get supported languages."""
|
|
|
return {
|
|
|
"languages": {
|
|
|
"fr": "French",
|
|
|
"en": "English",
|
|
|
"es": "Spanish",
|
|
|
"de": "German",
|
|
|
"it": "Italian",
|
|
|
"pt": "Portuguese",
|
|
|
"zh": "Chinese",
|
|
|
"ja": "Japanese",
|
|
|
"ko": "Korean",
|
|
|
"ru": "Russian",
|
|
|
"ar": "Arabic",
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
@app.local_entrypoint()
|
|
|
def main():
|
|
|
"""BabelDOC with Agentic AI - Local test."""
|
|
|
print("BabelDOC with Agentic AI - Modal Deployment")
|
|
|
print("=" * 45)
|
|
|
print(f"Max pages: {MAX_PAGES} (test phase)")
|
|
|
print()
|
|
|
print("Deploy: modal deploy modal_deploy.py")
|
|
|
print("Test: modal serve modal_deploy.py")
|
|
|
|