Long Trinh-Quang commited on
Commit
f2e1f48
·
1 Parent(s): 52ff743

update prepare_environment

Browse files
Files changed (1) hide show
  1. utils/prepare_environment.py +51 -20
utils/prepare_environment.py CHANGED
@@ -16,8 +16,14 @@ from tqdm import tqdm
16
  import tempfile
17
 
18
  DOWNLOADS = [
19
- "http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_cache_backup.tar.gz",
20
- "http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_gfpgan_backup.tar.gz",
 
 
 
 
 
 
21
  # "https://huggingface.co/hynt/F5-TTS-Vietnamese-ViVoice/resolve/main/config.json"
22
  ]
23
 
@@ -34,7 +40,7 @@ def download_file(url, dest_folder):
34
  print(
35
  f"✅ [SKIP] {filename} already exists at \033[96m{dest_path}\033[0m, skipping download."
36
  )
37
- return dest_path
38
  print(
39
  f"\n📥 [START] Downloading: \033[1;36m{filename}\033[0m → \033[96m{dest_path}\033[0m"
40
  )
@@ -52,12 +58,34 @@ def download_file(url, dest_folder):
52
  print(
53
  f"✅ [DONE] Downloaded: \033[1;32m{filename}\033[0m → \033[96m{dest_path}\033[0m\n"
54
  )
55
- return dest_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
 
58
- def create_volume_and_extract(tar_path, volume_name):
59
- # Create Docker volume
60
- subprocess.run(["docker", "volume", "create", volume_name], check=True)
 
 
 
 
61
 
62
  # Convert host path để Docker hiểu cả Windows & Linux
63
  host_path = os.path.abspath(os.path.dirname(tar_path))
@@ -76,10 +104,10 @@ def create_volume_and_extract(tar_path, volume_name):
76
  f"{volume_name}:/data", # Mount Docker volume (volume_name) vào thư mục /data trong container
77
  "-v",
78
  f"{host_path}:/tmpdata", # Mount thư mục chứa file tar.gz trên host vào /tmpdata trong container
79
- "busybox", # Image dùng để chạy container (ở đây là Ubuntu 22.04)
80
  "sh",
81
- "-c", # Chạy lệnh bash trong container
82
- f"tar -xzvf /tmpdata/{os.path.basename(tar_path)} --strip 1 -C /data", # Lệnh giải nén file tar.gz từ /tmpdata vào /data
83
  ],
84
  check=True,
85
  )
@@ -99,16 +127,19 @@ def cleanup_tmp(files):
99
 
100
  def main():
101
  downloaded_files = []
102
- for url in DOWNLOADS:
103
- tar_path = download_file(url, TMP_DIR)
104
  downloaded_files.append(tar_path)
105
  volume_name = (
106
  VOLUME_PREFIX
107
  + os.path.splitext(os.path.splitext(os.path.basename(tar_path))[0])[0]
108
  )
109
  print(f"🚀 [VOLUME] Name: \033[1;33m{volume_name}\033[0m")
110
- create_volume_and_extract(tar_path, volume_name)
111
- cleanup_tmp(downloaded_files)
 
 
 
112
 
113
 
114
  if __name__ == "__main__":
@@ -116,9 +147,9 @@ if __name__ == "__main__":
116
 
117
 
118
  # if __name__ == "__main__":
119
- # Test create_volume_and_extract với file test_data_backup.tar.gz
120
- # test_tar = os.path.join(TMP_DIR, "data_backup.tar.gz")
121
- # if os.path.exists(test_tar):
122
- # create_volume_and_extract(test_tar, "atalink_data_backup")
123
- # else:
124
- # print("File tmp/test_data_backup.tar.gz không tồn tại để test.")
 
16
  import tempfile
17
 
18
  DOWNLOADS = [
19
+ (
20
+ "http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_cache_backup.tar.gz",
21
+ 3,
22
+ ),
23
+ (
24
+ "http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_gfpgan_backup.tar.gz",
25
+ 4,
26
+ ),
27
  # "https://huggingface.co/hynt/F5-TTS-Vietnamese-ViVoice/resolve/main/config.json"
28
  ]
29
 
 
40
  print(
41
  f"✅ [SKIP] {filename} already exists at \033[96m{dest_path}\033[0m, skipping download."
42
  )
43
+ return dest_path, False # not downloaded this run
44
  print(
45
  f"\n📥 [START] Downloading: \033[1;36m{filename}\033[0m → \033[96m{dest_path}\033[0m"
46
  )
 
58
  print(
59
  f"✅ [DONE] Downloaded: \033[1;32m{filename}\033[0m → \033[96m{dest_path}\033[0m\n"
60
  )
61
+ return dest_path, True # downloaded in this run
62
+
63
+
64
+ def volume_exists(volume_name: str) -> bool:
65
+ """Return True if a Docker volume exists."""
66
+ try:
67
+ subprocess.run(
68
+ [
69
+ "docker",
70
+ "volume",
71
+ "inspect",
72
+ volume_name,
73
+ ],
74
+ check=True,
75
+ capture_output=True,
76
+ )
77
+ return True
78
+ except subprocess.CalledProcessError:
79
+ return False
80
 
81
 
82
+ def create_volume_and_extract(tar_path, volume_name, extract_path=1):
83
+ # Create Docker volume only if it doesn't exist
84
+ if volume_exists(volume_name):
85
+ print(f"✅ [VOLUME] Exists: \033[1;33m{volume_name}\033[0m — skipping create")
86
+ else:
87
+ subprocess.run(["docker", "volume", "create", volume_name], check=True)
88
+ print(f"🚀 [VOLUME] Created: \033[1;33m{volume_name}\033[0m")
89
 
90
  # Convert host path để Docker hiểu cả Windows & Linux
91
  host_path = os.path.abspath(os.path.dirname(tar_path))
 
104
  f"{volume_name}:/data", # Mount Docker volume (volume_name) vào thư mục /data trong container
105
  "-v",
106
  f"{host_path}:/tmpdata", # Mount thư mục chứa file tar.gz trên host vào /tmpdata trong container
107
+ "busybox", # Sử dụng busybox, tar hỗ trợ --strip-components
108
  "sh",
109
+ "-c", # Chạy lệnh shell trong container
110
+ f"tar -xzvf /tmpdata/{os.path.basename(tar_path)} --strip {extract_path} -C /data", # Lệnh giải nén file tar.gz từ /tmpdata vào /data
111
  ],
112
  check=True,
113
  )
 
127
 
128
  def main():
129
  downloaded_files = []
130
+ for url, extract_path in DOWNLOADS:
131
+ tar_path, downloaded = download_file(url, TMP_DIR)
132
  downloaded_files.append(tar_path)
133
  volume_name = (
134
  VOLUME_PREFIX
135
  + os.path.splitext(os.path.splitext(os.path.basename(tar_path))[0])[0]
136
  )
137
  print(f"🚀 [VOLUME] Name: \033[1;33m{volume_name}\033[0m")
138
+ create_volume_and_extract(tar_path, volume_name, extract_path)
139
+ if downloaded_files:
140
+ cleanup_tmp(downloaded_files)
141
+ else:
142
+ print("🧹 [CLEANUP] No new files downloaded — nothing to remove.")
143
 
144
 
145
  if __name__ == "__main__":
 
147
 
148
 
149
  # if __name__ == "__main__":
150
+ # Test create_volume_and_extract với file test_data_backup.tar.gz
151
+ # test_tar = os.path.join(TMP_DIR, "data_backup.tar.gz")
152
+ # if os.path.exists(test_tar):
153
+ # create_volume_and_extract(test_tar, "atalink_data_backup")
154
+ # else:
155
+ # print("File tmp/test_data_backup.tar.gz không tồn tại để test.")