Spaces:
Runtime error
Runtime error
File size: 4,273 Bytes
f96a1e4 e578b02 f96a1e4 e578b02 f96a1e4 1dce2dd e578b02 f96a1e4 e578b02 f96a1e4 e578b02 f96a1e4 e578b02 f96a1e4 e578b02 f96a1e4 e578b02 1dce2dd e578b02 1dce2dd e578b02 1dce2dd e578b02 f96a1e4 e578b02 f96a1e4 e578b02 f96a1e4 e578b02 1dce2dd f96a1e4 e578b02 f96a1e4 1dce2dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
# help me write a python script to download a file from remote server and storage in /tmp
# if the file has already exist in /tmp, just skip download step
# the next step, create the volume, and extract the downloaded data into that volume
# the final step, cleanup /tmp
# http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_cache_backup.tar.gz
# http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_gfpgan_backup.tar.gz
import os
import requests
import shutil
import subprocess
import tarfile
from tqdm import tqdm
DOWNLOADS = [
"http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_cache_backup.tar.gz",
"http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_gfpgan_backup.tar.gz"
# "https://huggingface.co/hynt/F5-TTS-Vietnamese-ViVoice/resolve/main/config.json"
]
TMP_DIR = "tmp"
VOLUME_PREFIX = "atalink_"
def download_file(url, dest_folder):
filename = os.path.basename(url)
dest_path = os.path.join(dest_folder, filename)
if os.path.exists(dest_path):
print(f"✅ [SKIP] {filename} already exists at \033[96m{dest_path}\033[0m, skipping download.")
return dest_path
print(f"\n📥 [START] Downloading: \033[1;36m{filename}\033[0m → \033[96m{dest_path}\033[0m")
with requests.get(url, stream=True) as r:
r.raise_for_status()
total = int(r.headers.get("content-length", 0))
chunk_size = 1024 * 1024 # 1MB
with open(dest_path, "wb") as f, tqdm(
total=total, unit="B", unit_scale=True, desc=filename
) as pbar:
for chunk in r.iter_content(chunk_size=chunk_size):
if chunk:
f.write(chunk)
pbar.update(len(chunk))
print(f"✅ [DONE] Downloaded: \033[1;32m{filename}\033[0m → \033[96m{dest_path}\033[0m\n")
return dest_path
def create_volume_and_extract(tar_path, volume_name):
# Create Docker volume
subprocess.run(["docker", "volume", "create", volume_name], check=True)
# Extract tar.gz into the volume using a temporary container
print(f"\n📦 [EXTRACT] Extracting \033[1;36m{os.path.basename(tar_path)}\033[0m into Docker volume \033[1;33m{volume_name}\033[0m ...")
subprocess.run(
[
"docker",
"run",
"--rm", # Chạy container tạm thời, tự xóa sau khi xong
"-v",
f"{volume_name}:/data", # Mount Docker volume (volume_name) vào thư mục /data trong container
"-v",
f"{os.path.abspath(os.path.dirname(tar_path))}:/tmpdata", # Mount thư mục chứa file tar.gz trên host vào /tmpdata trong container
"busybox", # Image dùng để chạy container (ở đây là Ubuntu 22.04)
"sh",
"-c", # Chạy lệnh bash trong container
f"tar -xzvf /tmpdata/{os.path.basename(tar_path)} --strip 1 -C /data", # Lệnh giải nén file tar.gz từ /tmpdata vào /data
],
check=True,
)
print(f"✅ [DONE] Extracted \033[1;36m{os.path.basename(tar_path)}\033[0m into volume \033[1;33m{volume_name}\033[0m\n")
def cleanup_tmp(files):
for f in files:
try:
os.remove(f)
print(f"🧹 [CLEANUP] Removed \033[96m{f}\033[0m")
except Exception as e:
print(f"⚠️ [CLEANUP] Could not remove \033[96m{f}\033[0m: {e}")
def main():
downloaded_files = []
for url in DOWNLOADS:
tar_path = download_file(url, TMP_DIR)
downloaded_files.append(tar_path)
volume_name = (
VOLUME_PREFIX
+ os.path.splitext(os.path.splitext(os.path.basename(tar_path))[0])[0]
)
print(f"🚀 [VOLUME] Name: \033[1;33m{volume_name}\033[0m")
create_volume_and_extract(tar_path, volume_name)
cleanup_tmp(downloaded_files)
if __name__ == "__main__":
main()
# if __name__ == "__main__":
# Test create_volume_and_extract với file test_data_backup.tar.gz
# test_tar = os.path.join("tmp", "data_backup.tar.gz")
# if os.path.exists(test_tar):
# create_volume_and_extract(test_tar, "atalink_data_backup")
# else:
# print("File tmp/test_data_backup.tar.gz không tồn tại để test.")
|