Omnilingual-ASR-Colab / server /download_models.py
NeuralFalcon's picture
Upload 21 files
92e075b verified
#@title download model /content/omniasr-transcriptions/server/download_models.sh
# %%writefile /content/omniasr-transcriptions/server/download_models.py
#!/usr/bin/env python3
"""
download_models.py
Ensures the MMS model files are downloaded into MODELS_DIR.
"""
import os
import urllib.request
import urllib.error
from tqdm.auto import tqdm
import sys
def download_file(url: str, download_file_path: str, redownload: bool = False) -> bool:
"""Download a single file with urllib + tqdm progress bar."""
base_path = os.path.dirname(download_file_path)
os.makedirs(base_path, exist_ok=True)
# Skip if file already exists
if os.path.exists(download_file_path):
if redownload:
os.remove(download_file_path)
tqdm.write(f"♻️ Redownloading: {os.path.basename(download_file_path)}")
elif os.path.getsize(download_file_path) > 0:
tqdm.write(f"βœ”οΈ Skipped (already exists): {os.path.basename(download_file_path)}")
return True
# Try fetching metadata
try:
request = urllib.request.urlopen(url)
total = int(request.headers.get("Content-Length", 0))
except urllib.error.URLError as e:
print(f"❌ Error: Unable to open URL: {url}")
print(f"Reason: {e.reason}")
return False
# Download with progress bar
with tqdm(
total=total,
desc=os.path.basename(download_file_path),
unit="B",
unit_scale=True,
unit_divisor=1024,
) as progress:
try:
urllib.request.urlretrieve(
url,
download_file_path,
reporthook=lambda count, block_size, total_size: progress.update(block_size),
)
except urllib.error.URLError as e:
print(f"❌ Error: Failed to download {url}")
print(f"Reason: {e.reason}")
return False
tqdm.write(f"⬇️ Downloaded: {os.path.basename(download_file_path)}")
return True
def main():
# Use MODELS_DIR from environment variable or default
MODELS_DIR = os.environ.get("MODELS_DIR", "./models")
print(f"πŸ“ Checking and downloading MMS models to: {MODELS_DIR}")
# Check write permission
if not os.access(os.path.dirname(MODELS_DIR) or ".", os.W_OK):
print(f"βœ— No write permission to {MODELS_DIR}")
sys.exit(1)
# βœ… Define URLs and build full local paths here
model_urls = {
"https://dl.fbaipublicfiles.com/mms/torchaudio/ctc_alignment_mling_uroman/dictionary.txt":
os.path.join(MODELS_DIR, "ctc_alignment_mling_uroman_model_dict.txt"),
"https://dl.fbaipublicfiles.com/mms/torchaudio/ctc_alignment_mling_uroman/model.pt":
os.path.join(MODELS_DIR, "ctc_alignment_mling_uroman_model.pt"),
}
for url, full_path in model_urls.items():
success = download_file(url, full_path)
if not success:
print(f"βœ— Failed to fetch: {os.path.basename(full_path)}")
sys.exit(1)
print("βœ… All model files are ready!")
main()
# if __name__ == "__main__":
# main()