import tempfile import zipfile from typing import Optional from pathlib import Path from dataset import ImageDataset def create_dataset_zip( dataset: ImageDataset, organize_in_folders: bool = True ) -> Optional[str]: """Create a zip file containing images and their text labels. Args: dataset: ImageDataset containing images with labels organize_in_folders: If True, puts images in 'images/' folder and texts in 'labels/' folder. If False, puts all files in the root folder. Returns: str: Path to the created zip file, or None if dataset is empty """ if not dataset.images: return None # Create a temporary file temp_file = tempfile.NamedTemporaryFile( delete=False, prefix="image-dataset", suffix=".zip" ) temp_path = temp_file.name temp_file.close() try: with zipfile.ZipFile(temp_path, "w", zipfile.ZIP_DEFLATED) as zip_file: for image_data in dataset.images: image_path = image_data["path"] label = image_data["label"] try: # Determine file paths based on organization option image_filename = Path(image_path).name base_name = Path(image_path).stem text_filename = f"{base_name}.txt" if organize_in_folders: image_zip_path = f"images/{image_filename}" text_zip_path = f"labels/{text_filename}" else: image_zip_path = image_filename text_zip_path = text_filename # Add the image file to zip with open(image_path, "rb") as img_file: zip_file.writestr(image_zip_path, img_file.read()) # Create and add the text file zip_file.writestr(text_zip_path, label.encode("utf-8")) except FileNotFoundError: # Skip if image file doesn't exist continue except Exception as e: # Log error but continue with other files print(f"Error processing {image_path}: {e}") continue return temp_path except Exception: # Clean up temp file if creation failed Path(temp_path).unlink(missing_ok=True) return None