Spaces:
No application file
No application file
| import pdfplumber | |
| from pathlib import Path | |
| import os | |
| input_path = Path("./documents") | |
| file_names = os.listdir(input_path) | |
| result = "" | |
| for file_name in file_names: | |
| pdf = pdfplumber.open(input_path / file_name) | |
| for page in pdf.pages: | |
| text = page.extract_text() | |
| result += text | |
| # encoding to ASCII will remove special caracters. | |
| result = result.encode(encoding="ASCII", errors="ignore").decode() | |
| with open(input_path / "result.txt", "w") as f: | |
| f.write(result) | |