auto-labelizer / packager.py
Metal3d's picture
Add prefix to the zip files
8101fd3
import tempfile
import zipfile
from typing import Optional
from pathlib import Path
from dataset import ImageDataset
def create_dataset_zip(
dataset: ImageDataset, organize_in_folders: bool = True
) -> Optional[str]:
"""Create a zip file containing images and their text labels.
Args:
dataset: ImageDataset containing images with labels
organize_in_folders: If True, puts images in 'images/' folder and texts in 'labels/' folder.
If False, puts all files in the root folder.
Returns:
str: Path to the created zip file, or None if dataset is empty
"""
if not dataset.images:
return None
# Create a temporary file
temp_file = tempfile.NamedTemporaryFile(
delete=False, prefix="image-dataset", suffix=".zip"
)
temp_path = temp_file.name
temp_file.close()
try:
with zipfile.ZipFile(temp_path, "w", zipfile.ZIP_DEFLATED) as zip_file:
for image_data in dataset.images:
image_path = image_data["path"]
label = image_data["label"]
try:
# Determine file paths based on organization option
image_filename = Path(image_path).name
base_name = Path(image_path).stem
text_filename = f"{base_name}.txt"
if organize_in_folders:
image_zip_path = f"images/{image_filename}"
text_zip_path = f"labels/{text_filename}"
else:
image_zip_path = image_filename
text_zip_path = text_filename
# Add the image file to zip
with open(image_path, "rb") as img_file:
zip_file.writestr(image_zip_path, img_file.read())
# Create and add the text file
zip_file.writestr(text_zip_path, label.encode("utf-8"))
except FileNotFoundError:
# Skip if image file doesn't exist
continue
except Exception as e:
# Log error but continue with other files
print(f"Error processing {image_path}: {e}")
continue
return temp_path
except Exception:
# Clean up temp file if creation failed
Path(temp_path).unlink(missing_ok=True)
return None