Spaces:
Runtime error
Runtime error
File size: 1,307 Bytes
119e740 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import gradio as gr
from PyPDF2 import PdfReader
from concurrent.futures import ThreadPoolExecutor
def convert_pdf_to_text(pdf_file):
print(pdf_file.name)
if not pdf_file.name.endswith(".pdf"):
raise ValueError("Invalid file format. Please upload PDF files only.")
text = "\n---\n"
text += f"file name: {pdf_file.name}\n content: \n"
with open(pdf_file.name, "rb") as file:
pdf_reader = PdfReader(file)
# Extract all text at once
text += "".join([page.extract_text() for page in pdf_reader.pages])
text += "\n---\n"
return text
def pdf_to_text(pdf_files):
# Create a ThreadPoolExecutor to run the conversion in parallel
with ThreadPoolExecutor() as executor:
# Use the executor to map the convert_pdf_to_text function over all the pdf_files
results = executor.map(convert_pdf_to_text, pdf_files)
# Concatenate the text from all the PDFs
text = "\n".join(results)
return text
iface = gr.Interface(
fn=pdf_to_text,
inputs=gr.inputs.File(
type="file", label="Upload a PDF file", file_count="multiple"),
outputs="text",
title="PDF to Text Converter",
description="Upload PDF files and get their content in text format.",
)
if __name__ == "__main__":
iface.launch()
|