Spaces:
Running
Running
| """Helper script to go from PDF to PNG ZIP Files we can use in HTML on the LB.""" | |
| from __future__ import annotations | |
| import glob | |
| import os | |
| import zipfile | |
| from pathlib import Path | |
| from pdf2image import convert_from_path | |
| root_dir = "./data" | |
| pdf_paths = glob.glob(os.path.join(root_dir, "**", "*.pdf"), recursive=True) | |
| for pdf_path in pdf_paths: | |
| # Relative path to recreate folder structure | |
| path_to_pdf = Path(pdf_path).resolve() | |
| path_to_png = path_to_pdf.with_suffix(".png") | |
| path_to_zip = path_to_pdf.with_suffix(".png.zip") | |
| print(f"Converting {pdf_path}...") | |
| images = convert_from_path(pdf_path, dpi=800) | |
| for _i, image in enumerate(images): | |
| image.save(path_to_png, "PNG") | |
| with zipfile.ZipFile(path_to_zip, "w") as zipf: | |
| zipf.write(path_to_png, arcname=path_to_png.name) | |
| path_to_png.unlink(missing_ok=True) | |
| path_to_pdf.unlink(missing_ok=True) | |