Spaces:
Runtime error
Runtime error
| import logging | |
| import os | |
| import re | |
| import shutil | |
| import stat | |
| import subprocess | |
| import time | |
| import uuid | |
| from contextlib import contextmanager | |
| from pathlib import Path | |
| import gradio as gr | |
| import requests | |
| from tm import create_tm | |
| logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.INFO) | |
| GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") | |
| ALIGNER_SCRIPT_DIR = Path("./tibetan-aligner").resolve() | |
| ALIGNER_SCRIPT_NAME = "align_tib_en.sh" | |
| ALIGNER_SCRIPT_PATH = ALIGNER_SCRIPT_DIR / ALIGNER_SCRIPT_NAME | |
| assert ALIGNER_SCRIPT_PATH.is_file() | |
| def make_dir_executable(dir_path: Path): | |
| for fn in dir_path.iterdir(): | |
| st = os.stat(fn) | |
| os.chmod(fn, st.st_mode | stat.S_IEXEC) | |
| st = os.stat(fn) | |
| os.chmod(fn, st.st_mode | stat.S_IXGRP) | |
| st = os.stat(fn) | |
| os.chmod(fn, st.st_mode | stat.S_IXOTH) | |
| make_dir_executable(ALIGNER_SCRIPT_DIR) | |
| def TemporaryDirectory(): | |
| tmpdir = Path("./output").resolve() / uuid.uuid4().hex[:8] | |
| tmpdir.mkdir(exist_ok=True, parents=True) | |
| try: | |
| yield tmpdir | |
| finally: | |
| shutil.rmtree(str(tmpdir)) | |
| def download_file(github_file_url: str, output_fn) -> Path: | |
| """Download file from github""" | |
| headers = { | |
| "Authorization": f"token {GITHUB_TOKEN}", | |
| "Accept": "application/vnd.github+json", | |
| } | |
| authenticated_file_url = f"{github_file_url}?token={GITHUB_TOKEN}" | |
| with requests.get(authenticated_file_url, headers=headers, stream=True) as r: | |
| r.raise_for_status() | |
| with open(output_fn, "wb") as f: | |
| for chunk in r.iter_content(chunk_size=8192): | |
| f.write(chunk) | |
| return output_fn | |
| def _run_align_script(bo_fn, en_fn, output_dir): | |
| start = time.time() | |
| cmd = [str(ALIGNER_SCRIPT_PATH), str(bo_fn), str(en_fn), str(output_dir)] | |
| output = subprocess.run( | |
| cmd, | |
| check=True, | |
| capture_output=True, | |
| text=True, | |
| cwd=str(ALIGNER_SCRIPT_DIR), | |
| ) | |
| output_fn = re.search(r"\[OUTPUT\] (.*)", output.stdout).group(1) | |
| output_fn = "/" + output_fn.split("//")[-1] | |
| end = time.time() | |
| total_time = round((end - start) / 60, 2) | |
| logging.info(f"Total time taken for Aligning: {total_time} mins") | |
| return output_fn | |
| def align(text_pair): | |
| logging.info(f"Running aligner for TM{text_pair['text_id']}...") | |
| with TemporaryDirectory() as tmpdir: | |
| output_dir = Path(tmpdir) | |
| bo_fn = download_file(text_pair["bo_file_url"], output_fn=output_dir / "bo.tx") | |
| en_fn = download_file(text_pair["en_file_url"], output_fn=output_dir / "en.tx") | |
| aligned_fn = _run_align_script(bo_fn, en_fn, output_dir) | |
| repo_url = create_tm(aligned_fn, text_pair=text_pair) | |
| return {"tm_repo_url": repo_url} | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Tibetan-English Aligner API") | |
| gr.Markdown("Please use Via API") | |
| input = gr.JSON( | |
| # value={ | |
| # "text_id": f"{uuid.uuid4().hex[:4]}", | |
| # "bo_file_url": "https://raw.githubusercontent.com/OpenPecha/tibetan-aligner/main/tests/data/text-bo.txt", | |
| # "en_file_url": "https://raw.githubusercontent.com/OpenPecha/tibetan-aligner/main/tests/data/text-en.txt", | |
| # } | |
| ) | |
| output = gr.JSON() | |
| align_btn = gr.Button("Align") | |
| align_btn.click( | |
| fn=align, | |
| inputs=input, | |
| outputs=output, | |
| api_name="align", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, debug=True) | |