apolinario commited on
Commit
b170468
·
1 Parent(s): 6305843

Support video

Browse files
Files changed (1) hide show
  1. ui/src/app/api/hf-jobs/route.ts +19 -8
ui/src/app/api/hf-jobs/route.ts CHANGED
@@ -216,7 +216,9 @@ def copy_dataset_files(source_dir: str, local_path: str):
216
  print(f"Collecting data files from {source_dir}")
217
 
218
  image_exts = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'}
 
219
  copied_images = 0
 
220
  copied_captions = 0
221
 
222
  for root, _, files in os.walk(source_dir):
@@ -236,6 +238,12 @@ def copy_dataset_files(source_dir: str, local_path: str):
236
  copied_images += 1
237
  except Exception as img_error:
238
  print(f"Error copying image {src_path}: {img_error}")
 
 
 
 
 
 
239
  elif ext == '.txt':
240
  try:
241
  shutil.copy2(src_path, dest_path)
@@ -248,8 +256,11 @@ def copy_dataset_files(source_dir: str, local_path: str):
248
  except Exception as other_error:
249
  print(f"Error copying file {src_path}: {other_error}")
250
 
251
- print(f"Prepared {copied_images} images and {copied_captions} captions in {local_path}")
252
- return copied_images, copied_captions
 
 
 
253
 
254
 
255
  def download_dataset(dataset_repo: str, local_path: str):
@@ -261,10 +272,10 @@ def download_dataset(dataset_repo: str, local_path: str):
261
  local_source = find_local_dataset_source(dataset_repo)
262
  if local_source:
263
  print(f"Found local dataset at {local_source}")
264
- images_copied, _ = copy_dataset_files(local_source, local_path)
265
- if images_copied > 0:
266
  return
267
- print("Local dataset did not contain images, falling back to remote download")
268
 
269
  repo_id = normalize_repo_id(dataset_repo)
270
 
@@ -274,10 +285,10 @@ def download_dataset(dataset_repo: str, local_path: str):
274
  temp_repo_path = snapshot_download(repo_id=repo_id, repo_type="dataset")
275
  print(f"Downloaded repo to: {temp_repo_path}")
276
  print(f"Contents: {os.listdir(temp_repo_path)}")
277
- images_copied, _ = copy_dataset_files(temp_repo_path, local_path)
278
- if images_copied > 0:
279
  return
280
- print("Snapshot download did not contain images, attempting structured dataset load")
281
  except Exception as snapshot_error:
282
  print(f"Snapshot download failed: {snapshot_error}")
283
 
 
216
  print(f"Collecting data files from {source_dir}")
217
 
218
  image_exts = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'}
219
+ video_exts = {'.mp4', '.avi', '.mov', '.webm', '.mkv', '.wmv', '.m4v', '.flv'}
220
  copied_images = 0
221
+ copied_videos = 0
222
  copied_captions = 0
223
 
224
  for root, _, files in os.walk(source_dir):
 
238
  copied_images += 1
239
  except Exception as img_error:
240
  print(f"Error copying image {src_path}: {img_error}")
241
+ elif ext in video_exts:
242
+ try:
243
+ shutil.copy2(src_path, dest_path)
244
+ copied_videos += 1
245
+ except Exception as vid_error:
246
+ print(f"Error copying video {src_path}: {vid_error}")
247
  elif ext == '.txt':
248
  try:
249
  shutil.copy2(src_path, dest_path)
 
256
  except Exception as other_error:
257
  print(f"Error copying file {src_path}: {other_error}")
258
 
259
+ total_media = copied_images + copied_videos
260
+ print(
261
+ f"Prepared {copied_images} images, {copied_videos} videos, and {copied_captions} captions in {local_path}"
262
+ )
263
+ return total_media, copied_captions
264
 
265
 
266
  def download_dataset(dataset_repo: str, local_path: str):
 
272
  local_source = find_local_dataset_source(dataset_repo)
273
  if local_source:
274
  print(f"Found local dataset at {local_source}")
275
+ media_copied, _ = copy_dataset_files(local_source, local_path)
276
+ if media_copied > 0:
277
  return
278
+ print("Local dataset did not contain media files, falling back to remote download")
279
 
280
  repo_id = normalize_repo_id(dataset_repo)
281
 
 
285
  temp_repo_path = snapshot_download(repo_id=repo_id, repo_type="dataset")
286
  print(f"Downloaded repo to: {temp_repo_path}")
287
  print(f"Contents: {os.listdir(temp_repo_path)}")
288
+ media_copied, _ = copy_dataset_files(temp_repo_path, local_path)
289
+ if media_copied > 0:
290
  return
291
+ print("Snapshot download did not contain media files, attempting structured dataset load")
292
  except Exception as snapshot_error:
293
  print(f"Snapshot download failed: {snapshot_error}")
294