h-xml's picture
Update app.py
a1346b2 verified
"""
BabelDocs x Agentic AI MCP - Gradio Application
PDF Translation with Google Drive Integration.
Accepts public GDrive links.
For Anthropic Hackathon - Track 1: Building MCP
Usage:
python app.py
"""
import os
import re
import base64
import tempfile
import httpx
import gradio as gr
from pathlib import Path
from datetime import datetime
from dotenv import load_dotenv
load_dotenv()
# Modal endpoint configuration
# Set BABELDOCS_MODAL_URL as HuggingFace Space secret for production
MODAL_BASE_URL = os.getenv("BABELDOCS_MODAL_URL")
if not MODAL_BASE_URL:
raise ValueError("BABELDOCS_MODAL_URL environment variable required. Set it as a HuggingFace Space secret.")
MODAL_TRANSLATE_URL = f"{MODAL_BASE_URL}-babeldocstranslator-api.modal.run"
MODAL_HEALTH_URL = f"{MODAL_BASE_URL}-babeldocstranslator-health.modal.run"
# Max pages limit (test phase)
MAX_PAGES = 20
# Supported languages
LANGUAGES = {
"en": "English",
"fr": "French",
"es": "Spanish",
"de": "German",
"it": "Italian",
"pt": "Portuguese",
"zh": "Chinese",
"ja": "Japanese",
"ko": "Korean",
"ru": "Russian",
"ar": "Arabic",
}
# Sample files for testing
SAMPLE_FILES = [
("French Contract (10 pages)", "https://drive.google.com/file/d/1S9cWP7QkiqltlYJt8o1FpQRLHElZLyYx/view?usp=sharing"),
("Sample Document 2", "https://drive.google.com/file/d/1IaTjmfNRhDsCsTQIo6To1HYVZ1IkQHw3/view?usp=sharing"),
("Sample Document 3", "https://drive.google.com/file/d/1oexokd-auHnGQGvQEu-0NAB8TFI8mnye/view?usp=sharing"),
]
def log_message(logs: list, message: str) -> list:
"""Add timestamped message to logs."""
timestamp = datetime.now().strftime("%H:%M:%S")
logs.append(f"[{timestamp}] {message}")
return logs
def extract_gdrive_file_id(url: str) -> str | None:
"""Extract file ID from Google Drive URL."""
patterns = [
r"/file/d/([a-zA-Z0-9_-]+)",
r"id=([a-zA-Z0-9_-]+)",
r"/d/([a-zA-Z0-9_-]+)",
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
async def download_gdrive_public(url: str) -> tuple[bytes, str]:
"""Download file from public Google Drive link.
Returns (file_bytes, filename).
"""
file_id = extract_gdrive_file_id(url)
if not file_id:
raise ValueError("Invalid Google Drive URL")
# Direct download URL
download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
async with httpx.AsyncClient(timeout=120.0, follow_redirects=True) as client:
response = await client.get(download_url)
response.raise_for_status()
# Try to get filename from Content-Disposition header
content_disp = response.headers.get("Content-Disposition", "")
filename_match = re.search(r'filename="?([^";\n]+)"?', content_disp)
if filename_match:
filename = filename_match.group(1)
else:
filename = f"gdrive_{file_id}.pdf"
return response.content, filename
async def translate_pdf_modal(
gdrive_url: str,
target_lang: str,
progress=gr.Progress()
) -> tuple:
"""Translate PDF using Modal cloud."""
logs = []
# Validate input
if not gdrive_url or not gdrive_url.strip():
return None, None, "Please provide a Google Drive link or select a sample file", "", "\n".join(logs)
try:
logs = log_message(logs, "Starting translation...")
# Get PDF bytes and filename from Google Drive
logs = log_message(logs, "Downloading from Google Drive...")
progress(0.05, desc="Downloading from Google Drive...")
pdf_bytes, source_filename = await download_gdrive_public(gdrive_url.strip())
logs = log_message(logs, f"Downloaded: {source_filename}")
pdf_base64 = base64.b64encode(pdf_bytes).decode("utf-8")
logs = log_message(logs, f"Input: {source_filename}")
logs = log_message(logs, f"Size: {len(pdf_bytes) / 1024:.1f} KB")
logs = log_message(logs, f"Target: {LANGUAGES.get(target_lang, target_lang)}")
progress(0.1, desc="Uploading to Modal...")
payload = {
"pdf_base64": pdf_base64,
"target_lang": target_lang,
}
logs = log_message(logs, "Translating on Modal cloud...")
logs = log_message(logs, "(This may take several minutes)")
progress(0.2, desc="Translating...")
start_time = datetime.now()
async with httpx.AsyncClient(timeout=900.0, follow_redirects=True) as client:
response = await client.post(MODAL_TRANSLATE_URL, json=payload)
response.raise_for_status()
result = response.json()
duration = (datetime.now() - start_time).total_seconds()
progress(0.8, desc="Processing result...")
if not result.get("success"):
error_msg = result.get("message", "Unknown error")
logs = log_message(logs, f"ERROR: {error_msg}")
return None, None, "Translation failed", "", "\n".join(logs)
# Process mono_img PDF
mono_img_path = None
mono_img_base64 = result.get("mono_img_pdf_base64")
if mono_img_base64:
mono_img_bytes = base64.b64decode(mono_img_base64)
stem = Path(source_filename).stem
mono_img_filename = f"{stem}_translated.{target_lang}.pdf"
mono_img_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
mono_img_file.write(mono_img_bytes)
mono_img_file.close()
mono_img_path = mono_img_file.name
logs = log_message(logs, f"Mono: {mono_img_filename} ({len(mono_img_bytes) / 1024:.1f} KB)")
# Process dual_img PDF
dual_img_path = None
dual_img_base64 = result.get("dual_img_pdf_base64")
if dual_img_base64:
dual_img_bytes = base64.b64decode(dual_img_base64)
stem = Path(source_filename).stem
dual_img_filename = f"{stem}_translated.{target_lang}.dual.pdf"
dual_img_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
dual_img_file.write(dual_img_bytes)
dual_img_file.close()
dual_img_path = dual_img_file.name
logs = log_message(logs, f"Dual: {dual_img_filename} ({len(dual_img_bytes) / 1024:.1f} KB)")
if not mono_img_path and not dual_img_path:
logs = log_message(logs, "ERROR: No output PDF in response")
return None, None, "Translation failed", "", "\n".join(logs)
logs = log_message(logs, f"Duration: {duration:.1f} seconds")
stats_msg = f"""**Translation completed!**
- **Duration:** {duration:.1f} seconds
- **Target:** {LANGUAGES.get(target_lang, target_lang)}"""
progress(1.0, desc="Done!")
return mono_img_path, dual_img_path, "Translation successful!", stats_msg, "\n".join(logs)
except httpx.TimeoutException:
logs = log_message(logs, "ERROR: Translation timed out")
return None, None, "Translation timed out", "", "\n".join(logs)
except httpx.HTTPStatusError as e:
logs = log_message(logs, f"ERROR: HTTP {e.response.status_code}")
return None, None, f"HTTP error: {e.response.status_code}", "", "\n".join(logs)
except Exception as e:
logs = log_message(logs, f"ERROR: {str(e)}")
return None, None, f"Error: {str(e)}", "", "\n".join(logs)
def load_sample(sample_name: str) -> str:
"""Load sample file URL."""
for name, url in SAMPLE_FILES:
if name == sample_name:
return url
return ""
# Gradio Interface
with gr.Blocks(title="BabelDocs x Agentic AI MCP") as demo:
gr.Markdown("""
# BabelDocs x Agentic AI MCP - PDF Translation with Layout Preservation
**Translate PDFs from Google Drive while preserving the original layout**
---
## Full Google Drive Workflow in Claude Desktop MCP
```
"Translate my Q3 report to French and save it to Translations folder"
Claude searches → downloads → translates → uploads → done!
```
---
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Input")
gr.Markdown("**Try a sample file:**")
sample_dropdown = gr.Dropdown(
choices=[name for name, _ in SAMPLE_FILES],
label="Sample Files",
info="Select a sample PDF to test the translation",
)
gr.Markdown("**Or paste your own Google Drive link:**")
gdrive_url = gr.Textbox(
label="Google Drive Link (public)",
placeholder="https://drive.google.com/file/d/...",
info="Paste a public Google Drive link",
)
target_lang = gr.Dropdown(
choices=list(LANGUAGES.keys()),
value="en",
label="Target Language",
)
translate_btn = gr.Button(
"Translate PDF",
variant="primary",
size="lg",
)
with gr.Column(scale=1):
gr.Markdown("### Result")
status_output = gr.Textbox(
label="Status",
interactive=False,
)
stats_output = gr.Markdown(label="Statistics")
gr.Markdown("**Downloads:**")
with gr.Row():
mono_img_output = gr.File(label="Mono (translated + images)")
dual_img_output = gr.File(label="Dual (bilingual + images)")
logs_output = gr.Textbox(
label="Logs",
interactive=False,
lines=10,
max_lines=15,
)
gr.Markdown("""
---
### How it works
```
1. Paste Google Drive link (or select sample)
2. Send to Modal cloud (serverless)
3. BabelDOC translates text + images, preserves layout
4. Download translated PDF
```
### Test Phase Limits
- **Maximum 20 pages per PDF** (to prevent token abuse)
- Oversized documents will be rejected automatically
---
**Built with:** BabelDOC, Modal, Nebius AI, Gradio | **Hackathon:** Anthropic MCP Track 1
""")
# Load sample URL when selected
sample_dropdown.change(
fn=load_sample,
inputs=[sample_dropdown],
outputs=[gdrive_url],
)
translate_btn.click(
fn=translate_pdf_modal,
inputs=[gdrive_url, target_lang],
outputs=[mono_img_output, dual_img_output, status_output, stats_output, logs_output],
)
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
ssr_mode=False,
)