TTS-Talker / utils /prepare_environment.py
Quang Long
update docker-compose, dockerfile, setup cronjob, fix generate audio
1dce2dd
# help me write a python script to download a file from remote server and storage in /tmp
# if the file has already exist in /tmp, just skip download step
# the next step, create the volume, and extract the downloaded data into that volume
# the final step, cleanup /tmp
# http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_cache_backup.tar.gz
# http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_gfpgan_backup.tar.gz
import os
import requests
import shutil
import subprocess
import tarfile
from tqdm import tqdm
DOWNLOADS = [
"http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_cache_backup.tar.gz",
"http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_gfpgan_backup.tar.gz"
# "https://huggingface.co/hynt/F5-TTS-Vietnamese-ViVoice/resolve/main/config.json"
]
TMP_DIR = "tmp"
VOLUME_PREFIX = "atalink_"
def download_file(url, dest_folder):
filename = os.path.basename(url)
dest_path = os.path.join(dest_folder, filename)
if os.path.exists(dest_path):
print(f"✅ [SKIP] {filename} already exists at \033[96m{dest_path}\033[0m, skipping download.")
return dest_path
print(f"\n📥 [START] Downloading: \033[1;36m{filename}\033[0m → \033[96m{dest_path}\033[0m")
with requests.get(url, stream=True) as r:
r.raise_for_status()
total = int(r.headers.get("content-length", 0))
chunk_size = 1024 * 1024 # 1MB
with open(dest_path, "wb") as f, tqdm(
total=total, unit="B", unit_scale=True, desc=filename
) as pbar:
for chunk in r.iter_content(chunk_size=chunk_size):
if chunk:
f.write(chunk)
pbar.update(len(chunk))
print(f"✅ [DONE] Downloaded: \033[1;32m{filename}\033[0m → \033[96m{dest_path}\033[0m\n")
return dest_path
def create_volume_and_extract(tar_path, volume_name):
# Create Docker volume
subprocess.run(["docker", "volume", "create", volume_name], check=True)
# Extract tar.gz into the volume using a temporary container
print(f"\n📦 [EXTRACT] Extracting \033[1;36m{os.path.basename(tar_path)}\033[0m into Docker volume \033[1;33m{volume_name}\033[0m ...")
subprocess.run(
[
"docker",
"run",
"--rm", # Chạy container tạm thời, tự xóa sau khi xong
"-v",
f"{volume_name}:/data", # Mount Docker volume (volume_name) vào thư mục /data trong container
"-v",
f"{os.path.abspath(os.path.dirname(tar_path))}:/tmpdata", # Mount thư mục chứa file tar.gz trên host vào /tmpdata trong container
"busybox", # Image dùng để chạy container (ở đây là Ubuntu 22.04)
"sh",
"-c", # Chạy lệnh bash trong container
f"tar -xzvf /tmpdata/{os.path.basename(tar_path)} --strip 1 -C /data", # Lệnh giải nén file tar.gz từ /tmpdata vào /data
],
check=True,
)
print(f"✅ [DONE] Extracted \033[1;36m{os.path.basename(tar_path)}\033[0m into volume \033[1;33m{volume_name}\033[0m\n")
def cleanup_tmp(files):
for f in files:
try:
os.remove(f)
print(f"🧹 [CLEANUP] Removed \033[96m{f}\033[0m")
except Exception as e:
print(f"⚠️ [CLEANUP] Could not remove \033[96m{f}\033[0m: {e}")
def main():
downloaded_files = []
for url in DOWNLOADS:
tar_path = download_file(url, TMP_DIR)
downloaded_files.append(tar_path)
volume_name = (
VOLUME_PREFIX
+ os.path.splitext(os.path.splitext(os.path.basename(tar_path))[0])[0]
)
print(f"🚀 [VOLUME] Name: \033[1;33m{volume_name}\033[0m")
create_volume_and_extract(tar_path, volume_name)
cleanup_tmp(downloaded_files)
if __name__ == "__main__":
main()
# if __name__ == "__main__":
# Test create_volume_and_extract với file test_data_backup.tar.gz
# test_tar = os.path.join("tmp", "data_backup.tar.gz")
# if os.path.exists(test_tar):
# create_volume_and_extract(test_tar, "atalink_data_backup")
# else:
# print("File tmp/test_data_backup.tar.gz không tồn tại để test.")