Spaces:
Sleeping
Sleeping
File size: 4,042 Bytes
0fd441a c6fb648 0fd441a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
# tests/test_file_handler.py
# run with pytest tests/.
import pytest
from pathlib import Path
import tempfile
from unittest.mock import patch
from utils.file_utils import (
collect_pdf_paths, collect_html_paths, collect_markdown_paths,
process_dicts_data, create_outputdir
)
@pytest.fixture
def temp_dir_with_pdfs():
with tempfile.TemporaryDirectory() as tmpdirname:
tmpdir = Path(tmpdirname)
# Create sample PDF files
(tmpdir / "doc1.pdf").touch()
(tmpdir / "subfolder/doc2.pdf").mkdir(parents=True)
(tmpdir / "subfolder/doc2.pdf").touch()
(tmpdir / "not_pdf.txt").touch()
yield tmpdir
@pytest.fixture
def temp_dir_with_html():
with tempfile.TemporaryDirectory() as tmpdirname:
tmpdir = Path(tmpdirname)
(tmpdir / "page1.html").touch()
(tmpdir / "subfolder/page2.htm").mkdir(parents=True)
(tmpdir / "subfolder/page2.htm").touch()
(tmpdir / "not_html.md").touch()
yield tmpdir
@pytest.fixture
def temp_dir_with_md():
with tempfile.TemporaryDirectory() as tmpdirname:
tmpdir = Path(tmpdirname)
(tmpdir / "note1.md").touch()
(tmpdir / "subfolder/note2.md").mkdir(parents=True)
(tmpdir / "subfolder/note2.md").touch()
(tmpdir / "not_md.pdf").touch()
yield tmpdir
def test_collect_pdf_paths(temp_dir_with_pdfs):
paths = collect_pdf_paths(str(temp_dir_with_pdfs))
assert len(paths) == 2
assert all(p.suffix.lower() == '.pdf' for p in paths)
assert Path(str(temp_dir_with_pdfs) / "doc1.pdf") in paths
assert Path(str(temp_dir_with_pdfs) / "subfolder/doc2.pdf") in paths
def test_collect_pdf_paths_no_pdfs(temp_dir_with_html):
paths = collect_pdf_paths(str(temp_dir_with_html))
assert len(paths) == 0
def test_collect_html_paths(temp_dir_with_html):
paths = collect_html_paths(str(temp_dir_with_html))
assert len(paths) == 2
assert all(p.suffix.lower() in ['.html', '.htm'] for p in paths)
assert Path(str(temp_dir_with_html) / "page1.html") in paths
assert Path(str(temp_dir_with_html) / "subfolder/page2.htm") in paths
def test_collect_html_paths_no_html(temp_dir_with_pdfs):
paths = collect_html_paths(str(temp_dir_with_pdfs))
assert len(paths) == 0
def test_collect_markdown_paths(temp_dir_with_md):
paths = collect_markdown_paths(str(temp_dir_with_md))
assert len(paths) == 2
assert all(p.suffix.lower() == '.md' for p in paths)
assert Path(str(temp_dir_with_md) / "note1.md") in paths
assert Path(str(temp_dir_with_md) / "subfolder/note2.md") in paths
def test_collect_markdown_paths_no_md(temp_dir_with_pdfs):
paths = collect_markdown_paths(str(temp_dir_with_pdfs))
assert len(paths) == 0
def test_process_dicts_data():
sample_logs = [
{"filepath": Path("file1.md"), "markdown": "Content1", "image_path": ["img1.jpg"]},
{"filepath": Path("file2.md"), "markdown": "Content2", "image_path": []},
{"error": "Conversion failed for file3"}
]
result = process_dicts_data(sample_logs)
assert "file1.md" in result
assert "Content1" in result
assert "img1.jpg" in result
assert "Conversion failed" in result
def test_process_dicts_data_empty():
result = process_dicts_data([])
assert result == ""
def test_process_dicts_data_invalid():
with pytest.raises(ValueError):
process_dicts_data([{"invalid": "data"}])
def test_create_outputdir(tmp_path):
output_dir = tmp_path / "test_output"
create_outputdir(str(output_dir))
assert output_dir.exists()
assert output_dir.is_dir()
def test_create_outputdir_existing(tmp_path):
output_dir = tmp_path / "test_output"
output_dir.mkdir()
create_outputdir(str(output_dir))
assert output_dir.exists()
assert output_dir.is_dir()
@patch('pathlib.Path.mkdir')
def test_create_outputdir_error(mock_mkdir):
mock_mkdir.side_effect = OSError("Permission denied")
with pytest.raises(OSError):
create_outputdir("protected_dir") |