File size: 4,273 Bytes
f96a1e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e578b02
f96a1e4
 
e578b02
 
 
f96a1e4
 
 
1dce2dd
 
e578b02
f96a1e4
 
 
 
 
e578b02
f96a1e4
e578b02
f96a1e4
 
e578b02
 
 
 
 
 
 
 
 
 
f96a1e4
 
e578b02
f96a1e4
 
 
 
e578b02
 
 
 
 
 
 
 
 
1dce2dd
e578b02
1dce2dd
e578b02
1dce2dd
e578b02
 
 
 
 
f96a1e4
 
 
 
 
e578b02
f96a1e4
e578b02
 
f96a1e4
 
 
 
 
 
e578b02
 
 
 
1dce2dd
 
 
f96a1e4
e578b02
f96a1e4
 
1dce2dd
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# help me write a python script to download a file from remote server and storage in /tmp

# if the file has already exist in /tmp, just skip download step

# the next step, create the volume, and extract the downloaded data into that volume

# the final step, cleanup /tmp

# http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_cache_backup.tar.gz
# http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_gfpgan_backup.tar.gz

import os
import requests
import shutil
import subprocess
import tarfile
from tqdm import tqdm

DOWNLOADS = [
    "http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_cache_backup.tar.gz",
    "http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_gfpgan_backup.tar.gz"
    # "https://huggingface.co/hynt/F5-TTS-Vietnamese-ViVoice/resolve/main/config.json"
]


TMP_DIR = "tmp"
VOLUME_PREFIX = "atalink_"


def download_file(url, dest_folder):
    filename = os.path.basename(url)
    dest_path = os.path.join(dest_folder, filename)
    if os.path.exists(dest_path):
        print(f"✅ [SKIP] {filename} already exists at \033[96m{dest_path}\033[0m, skipping download.")
        return dest_path
    print(f"\n📥 [START] Downloading: \033[1;36m{filename}\033[0m → \033[96m{dest_path}\033[0m")
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        total = int(r.headers.get("content-length", 0))
        chunk_size = 1024 * 1024  # 1MB
        with open(dest_path, "wb") as f, tqdm(
            total=total, unit="B", unit_scale=True, desc=filename
        ) as pbar:
            for chunk in r.iter_content(chunk_size=chunk_size):
                if chunk:
                    f.write(chunk)
                    pbar.update(len(chunk))
    print(f"✅ [DONE] Downloaded: \033[1;32m{filename}\033[0m → \033[96m{dest_path}\033[0m\n")
    return dest_path


def create_volume_and_extract(tar_path, volume_name):
    # Create Docker volume
    subprocess.run(["docker", "volume", "create", volume_name], check=True)
    # Extract tar.gz into the volume using a temporary container
    print(f"\n📦 [EXTRACT] Extracting \033[1;36m{os.path.basename(tar_path)}\033[0m into Docker volume \033[1;33m{volume_name}\033[0m ...")
    subprocess.run(
        [
            "docker",
            "run",
            "--rm",  # Chạy container tạm thời, tự xóa sau khi xong
            "-v",
            f"{volume_name}:/data",  # Mount Docker volume (volume_name) vào thư mục /data trong container
            "-v",
            f"{os.path.abspath(os.path.dirname(tar_path))}:/tmpdata",  # Mount thư mục chứa file tar.gz trên host vào /tmpdata trong container
            "busybox",  # Image dùng để chạy container (ở đây là Ubuntu 22.04)
            "sh",
            "-c",  # Chạy lệnh bash trong container
            f"tar -xzvf /tmpdata/{os.path.basename(tar_path)} --strip 1 -C /data",  # Lệnh giải nén file tar.gz từ /tmpdata vào /data
        ],
        check=True,
    )
    print(f"✅ [DONE] Extracted \033[1;36m{os.path.basename(tar_path)}\033[0m into volume \033[1;33m{volume_name}\033[0m\n")


def cleanup_tmp(files):
    for f in files:
        try:
            os.remove(f)
            print(f"🧹 [CLEANUP] Removed \033[96m{f}\033[0m")
        except Exception as e:
            print(f"⚠️ [CLEANUP] Could not remove \033[96m{f}\033[0m: {e}")


def main():
    downloaded_files = []
    for url in DOWNLOADS:
        tar_path = download_file(url, TMP_DIR)
        downloaded_files.append(tar_path)
        volume_name = (
            VOLUME_PREFIX
            + os.path.splitext(os.path.splitext(os.path.basename(tar_path))[0])[0]
        )
        print(f"🚀 [VOLUME] Name: \033[1;33m{volume_name}\033[0m")
        create_volume_and_extract(tar_path, volume_name)
    cleanup_tmp(downloaded_files)


if __name__ == "__main__":
    main()


# if __name__ == "__main__":
    # Test create_volume_and_extract với file test_data_backup.tar.gz
    # test_tar = os.path.join("tmp", "data_backup.tar.gz")
    # if os.path.exists(test_tar):
    #     create_volume_and_extract(test_tar, "atalink_data_backup")
    # else:
    #     print("File tmp/test_data_backup.tar.gz không tồn tại để test.")