Spaces:
Sleeping
Sleeping
| # tests/test_file_handler.py | |
| # run with pytest tests/. | |
| import pytest | |
| from pathlib import Path | |
| import tempfile | |
| from unittest.mock import patch | |
| from utils.file_utils import ( | |
| collect_pdf_paths, collect_html_paths, collect_markdown_paths, | |
| process_dicts_data, create_outputdir | |
| ) | |
| def temp_dir_with_pdfs(): | |
| with tempfile.TemporaryDirectory() as tmpdirname: | |
| tmpdir = Path(tmpdirname) | |
| # Create sample PDF files | |
| (tmpdir / "doc1.pdf").touch() | |
| (tmpdir / "subfolder/doc2.pdf").mkdir(parents=True) | |
| (tmpdir / "subfolder/doc2.pdf").touch() | |
| (tmpdir / "not_pdf.txt").touch() | |
| yield tmpdir | |
| def temp_dir_with_html(): | |
| with tempfile.TemporaryDirectory() as tmpdirname: | |
| tmpdir = Path(tmpdirname) | |
| (tmpdir / "page1.html").touch() | |
| (tmpdir / "subfolder/page2.htm").mkdir(parents=True) | |
| (tmpdir / "subfolder/page2.htm").touch() | |
| (tmpdir / "not_html.md").touch() | |
| yield tmpdir | |
| def temp_dir_with_md(): | |
| with tempfile.TemporaryDirectory() as tmpdirname: | |
| tmpdir = Path(tmpdirname) | |
| (tmpdir / "note1.md").touch() | |
| (tmpdir / "subfolder/note2.md").mkdir(parents=True) | |
| (tmpdir / "subfolder/note2.md").touch() | |
| (tmpdir / "not_md.pdf").touch() | |
| yield tmpdir | |
| def test_collect_pdf_paths(temp_dir_with_pdfs): | |
| paths = collect_pdf_paths(str(temp_dir_with_pdfs)) | |
| assert len(paths) == 2 | |
| assert all(p.suffix.lower() == '.pdf' for p in paths) | |
| assert Path(str(temp_dir_with_pdfs) / "doc1.pdf") in paths | |
| assert Path(str(temp_dir_with_pdfs) / "subfolder/doc2.pdf") in paths | |
| def test_collect_pdf_paths_no_pdfs(temp_dir_with_html): | |
| paths = collect_pdf_paths(str(temp_dir_with_html)) | |
| assert len(paths) == 0 | |
| def test_collect_html_paths(temp_dir_with_html): | |
| paths = collect_html_paths(str(temp_dir_with_html)) | |
| assert len(paths) == 2 | |
| assert all(p.suffix.lower() in ['.html', '.htm'] for p in paths) | |
| assert Path(str(temp_dir_with_html) / "page1.html") in paths | |
| assert Path(str(temp_dir_with_html) / "subfolder/page2.htm") in paths | |
| def test_collect_html_paths_no_html(temp_dir_with_pdfs): | |
| paths = collect_html_paths(str(temp_dir_with_pdfs)) | |
| assert len(paths) == 0 | |
| def test_collect_markdown_paths(temp_dir_with_md): | |
| paths = collect_markdown_paths(str(temp_dir_with_md)) | |
| assert len(paths) == 2 | |
| assert all(p.suffix.lower() == '.md' for p in paths) | |
| assert Path(str(temp_dir_with_md) / "note1.md") in paths | |
| assert Path(str(temp_dir_with_md) / "subfolder/note2.md") in paths | |
| def test_collect_markdown_paths_no_md(temp_dir_with_pdfs): | |
| paths = collect_markdown_paths(str(temp_dir_with_pdfs)) | |
| assert len(paths) == 0 | |
| def test_process_dicts_data(): | |
| sample_logs = [ | |
| {"filepath": Path("file1.md"), "markdown": "Content1", "image_path": ["img1.jpg"]}, | |
| {"filepath": Path("file2.md"), "markdown": "Content2", "image_path": []}, | |
| {"error": "Conversion failed for file3"} | |
| ] | |
| result = process_dicts_data(sample_logs) | |
| assert "file1.md" in result | |
| assert "Content1" in result | |
| assert "img1.jpg" in result | |
| assert "Conversion failed" in result | |
| def test_process_dicts_data_empty(): | |
| result = process_dicts_data([]) | |
| assert result == "" | |
| def test_process_dicts_data_invalid(): | |
| with pytest.raises(ValueError): | |
| process_dicts_data([{"invalid": "data"}]) | |
| def test_create_outputdir(tmp_path): | |
| output_dir = tmp_path / "test_output" | |
| create_outputdir(str(output_dir)) | |
| assert output_dir.exists() | |
| assert output_dir.is_dir() | |
| def test_create_outputdir_existing(tmp_path): | |
| output_dir = tmp_path / "test_output" | |
| output_dir.mkdir() | |
| create_outputdir(str(output_dir)) | |
| assert output_dir.exists() | |
| assert output_dir.is_dir() | |
| def test_create_outputdir_error(mock_mkdir): | |
| mock_mkdir.side_effect = OSError("Permission denied") | |
| with pytest.raises(OSError): | |
| create_outputdir("protected_dir") |