#@title download model /content/omniasr-transcriptions/server/download_models.sh # %%writefile /content/omniasr-transcriptions/server/download_models.py #!/usr/bin/env python3 """ download_models.py Ensures the MMS model files are downloaded into MODELS_DIR. """ import os import urllib.request import urllib.error from tqdm.auto import tqdm import sys def download_file(url: str, download_file_path: str, redownload: bool = False) -> bool: """Download a single file with urllib + tqdm progress bar.""" base_path = os.path.dirname(download_file_path) os.makedirs(base_path, exist_ok=True) # Skip if file already exists if os.path.exists(download_file_path): if redownload: os.remove(download_file_path) tqdm.write(f"♻️ Redownloading: {os.path.basename(download_file_path)}") elif os.path.getsize(download_file_path) > 0: tqdm.write(f"✔️ Skipped (already exists): {os.path.basename(download_file_path)}") return True # Try fetching metadata try: request = urllib.request.urlopen(url) total = int(request.headers.get("Content-Length", 0)) except urllib.error.URLError as e: print(f"❌ Error: Unable to open URL: {url}") print(f"Reason: {e.reason}") return False # Download with progress bar with tqdm( total=total, desc=os.path.basename(download_file_path), unit="B", unit_scale=True, unit_divisor=1024, ) as progress: try: urllib.request.urlretrieve( url, download_file_path, reporthook=lambda count, block_size, total_size: progress.update(block_size), ) except urllib.error.URLError as e: print(f"❌ Error: Failed to download {url}") print(f"Reason: {e.reason}") return False tqdm.write(f"⬇️ Downloaded: {os.path.basename(download_file_path)}") return True def main(): # Use MODELS_DIR from environment variable or default MODELS_DIR = os.environ.get("MODELS_DIR", "./models") print(f"📁 Checking and downloading MMS models to: {MODELS_DIR}") # Check write permission if not os.access(os.path.dirname(MODELS_DIR) or ".", os.W_OK): print(f"✗ No write permission to {MODELS_DIR}") sys.exit(1) # ✅ Define URLs and build full local paths here model_urls = { "https://dl.fbaipublicfiles.com/mms/torchaudio/ctc_alignment_mling_uroman/dictionary.txt": os.path.join(MODELS_DIR, "ctc_alignment_mling_uroman_model_dict.txt"), "https://dl.fbaipublicfiles.com/mms/torchaudio/ctc_alignment_mling_uroman/model.pt": os.path.join(MODELS_DIR, "ctc_alignment_mling_uroman_model.pt"), } for url, full_path in model_urls.items(): success = download_file(url, full_path) if not success: print(f"✗ Failed to fetch: {os.path.basename(full_path)}") sys.exit(1) print("✅ All model files are ready!") main() # if __name__ == "__main__": # main()