Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from PyPDF2 import PdfReader | |
| from concurrent.futures import ThreadPoolExecutor | |
| def convert_pdf_to_text(pdf_file): | |
| print(pdf_file.name) | |
| if not pdf_file.name.endswith(".pdf"): | |
| raise ValueError("Invalid file format. Please upload PDF files only.") | |
| text = "\n---\n" | |
| text += f"file name: {pdf_file.name}\n content: \n" | |
| with open(pdf_file.name, "rb") as file: | |
| pdf_reader = PdfReader(file) | |
| # Extract all text at once | |
| text += "".join([page.extract_text() for page in pdf_reader.pages]) | |
| text += "\n---\n" | |
| return text | |
| def pdf_to_text(pdf_files): | |
| # Create a ThreadPoolExecutor to run the conversion in parallel | |
| with ThreadPoolExecutor() as executor: | |
| # Use the executor to map the convert_pdf_to_text function over all the pdf_files | |
| results = executor.map(convert_pdf_to_text, pdf_files) | |
| # Concatenate the text from all the PDFs | |
| text = "\n".join(results) | |
| return text | |
| iface = gr.Interface( | |
| fn=pdf_to_text, | |
| inputs=gr.inputs.File( | |
| type="file", label="Upload a PDF file", file_count="multiple"), | |
| outputs="text", | |
| title="PDF to Text Converter", | |
| description="Upload PDF files and get their content in text format.", | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |