Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
b170468
1
Parent(s):
6305843
Support video
Browse files
ui/src/app/api/hf-jobs/route.ts
CHANGED
|
@@ -216,7 +216,9 @@ def copy_dataset_files(source_dir: str, local_path: str):
|
|
| 216 |
print(f"Collecting data files from {source_dir}")
|
| 217 |
|
| 218 |
image_exts = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'}
|
|
|
|
| 219 |
copied_images = 0
|
|
|
|
| 220 |
copied_captions = 0
|
| 221 |
|
| 222 |
for root, _, files in os.walk(source_dir):
|
|
@@ -236,6 +238,12 @@ def copy_dataset_files(source_dir: str, local_path: str):
|
|
| 236 |
copied_images += 1
|
| 237 |
except Exception as img_error:
|
| 238 |
print(f"Error copying image {src_path}: {img_error}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
elif ext == '.txt':
|
| 240 |
try:
|
| 241 |
shutil.copy2(src_path, dest_path)
|
|
@@ -248,8 +256,11 @@ def copy_dataset_files(source_dir: str, local_path: str):
|
|
| 248 |
except Exception as other_error:
|
| 249 |
print(f"Error copying file {src_path}: {other_error}")
|
| 250 |
|
| 251 |
-
|
| 252 |
-
|
|
|
|
|
|
|
|
|
|
| 253 |
|
| 254 |
|
| 255 |
def download_dataset(dataset_repo: str, local_path: str):
|
|
@@ -261,10 +272,10 @@ def download_dataset(dataset_repo: str, local_path: str):
|
|
| 261 |
local_source = find_local_dataset_source(dataset_repo)
|
| 262 |
if local_source:
|
| 263 |
print(f"Found local dataset at {local_source}")
|
| 264 |
-
|
| 265 |
-
if
|
| 266 |
return
|
| 267 |
-
print("Local dataset did not contain
|
| 268 |
|
| 269 |
repo_id = normalize_repo_id(dataset_repo)
|
| 270 |
|
|
@@ -274,10 +285,10 @@ def download_dataset(dataset_repo: str, local_path: str):
|
|
| 274 |
temp_repo_path = snapshot_download(repo_id=repo_id, repo_type="dataset")
|
| 275 |
print(f"Downloaded repo to: {temp_repo_path}")
|
| 276 |
print(f"Contents: {os.listdir(temp_repo_path)}")
|
| 277 |
-
|
| 278 |
-
if
|
| 279 |
return
|
| 280 |
-
print("Snapshot download did not contain
|
| 281 |
except Exception as snapshot_error:
|
| 282 |
print(f"Snapshot download failed: {snapshot_error}")
|
| 283 |
|
|
|
|
| 216 |
print(f"Collecting data files from {source_dir}")
|
| 217 |
|
| 218 |
image_exts = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'}
|
| 219 |
+
video_exts = {'.mp4', '.avi', '.mov', '.webm', '.mkv', '.wmv', '.m4v', '.flv'}
|
| 220 |
copied_images = 0
|
| 221 |
+
copied_videos = 0
|
| 222 |
copied_captions = 0
|
| 223 |
|
| 224 |
for root, _, files in os.walk(source_dir):
|
|
|
|
| 238 |
copied_images += 1
|
| 239 |
except Exception as img_error:
|
| 240 |
print(f"Error copying image {src_path}: {img_error}")
|
| 241 |
+
elif ext in video_exts:
|
| 242 |
+
try:
|
| 243 |
+
shutil.copy2(src_path, dest_path)
|
| 244 |
+
copied_videos += 1
|
| 245 |
+
except Exception as vid_error:
|
| 246 |
+
print(f"Error copying video {src_path}: {vid_error}")
|
| 247 |
elif ext == '.txt':
|
| 248 |
try:
|
| 249 |
shutil.copy2(src_path, dest_path)
|
|
|
|
| 256 |
except Exception as other_error:
|
| 257 |
print(f"Error copying file {src_path}: {other_error}")
|
| 258 |
|
| 259 |
+
total_media = copied_images + copied_videos
|
| 260 |
+
print(
|
| 261 |
+
f"Prepared {copied_images} images, {copied_videos} videos, and {copied_captions} captions in {local_path}"
|
| 262 |
+
)
|
| 263 |
+
return total_media, copied_captions
|
| 264 |
|
| 265 |
|
| 266 |
def download_dataset(dataset_repo: str, local_path: str):
|
|
|
|
| 272 |
local_source = find_local_dataset_source(dataset_repo)
|
| 273 |
if local_source:
|
| 274 |
print(f"Found local dataset at {local_source}")
|
| 275 |
+
media_copied, _ = copy_dataset_files(local_source, local_path)
|
| 276 |
+
if media_copied > 0:
|
| 277 |
return
|
| 278 |
+
print("Local dataset did not contain media files, falling back to remote download")
|
| 279 |
|
| 280 |
repo_id = normalize_repo_id(dataset_repo)
|
| 281 |
|
|
|
|
| 285 |
temp_repo_path = snapshot_download(repo_id=repo_id, repo_type="dataset")
|
| 286 |
print(f"Downloaded repo to: {temp_repo_path}")
|
| 287 |
print(f"Contents: {os.listdir(temp_repo_path)}")
|
| 288 |
+
media_copied, _ = copy_dataset_files(temp_repo_path, local_path)
|
| 289 |
+
if media_copied > 0:
|
| 290 |
return
|
| 291 |
+
print("Snapshot download did not contain media files, attempting structured dataset load")
|
| 292 |
except Exception as snapshot_error:
|
| 293 |
print(f"Snapshot download failed: {snapshot_error}")
|
| 294 |
|