Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,474 Bytes
6904fc7 8101fd3 6904fc7 db31bbc 6904fc7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import tempfile
import zipfile
from typing import Optional
from pathlib import Path
from dataset import ImageDataset
def create_dataset_zip(
dataset: ImageDataset, organize_in_folders: bool = True
) -> Optional[str]:
"""Create a zip file containing images and their text labels.
Args:
dataset: ImageDataset containing images with labels
organize_in_folders: If True, puts images in 'images/' folder and texts in 'labels/' folder.
If False, puts all files in the root folder.
Returns:
str: Path to the created zip file, or None if dataset is empty
"""
if not dataset.images:
return None
# Create a temporary file
temp_file = tempfile.NamedTemporaryFile(
delete=False, prefix="image-dataset", suffix=".zip"
)
temp_path = temp_file.name
temp_file.close()
try:
with zipfile.ZipFile(temp_path, "w", zipfile.ZIP_DEFLATED) as zip_file:
for image_data in dataset.images:
image_path = image_data["path"]
label = image_data["label"]
try:
# Determine file paths based on organization option
image_filename = Path(image_path).name
base_name = Path(image_path).stem
text_filename = f"{base_name}.txt"
if organize_in_folders:
image_zip_path = f"images/{image_filename}"
text_zip_path = f"labels/{text_filename}"
else:
image_zip_path = image_filename
text_zip_path = text_filename
# Add the image file to zip
with open(image_path, "rb") as img_file:
zip_file.writestr(image_zip_path, img_file.read())
# Create and add the text file
zip_file.writestr(text_zip_path, label.encode("utf-8"))
except FileNotFoundError:
# Skip if image file doesn't exist
continue
except Exception as e:
# Log error but continue with other files
print(f"Error processing {image_path}: {e}")
continue
return temp_path
except Exception:
# Clean up temp file if creation failed
Path(temp_path).unlink(missing_ok=True)
return None
|