Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from PyPDF2 import PdfReader | |
| def read_pdf(*inps): | |
| pdf_file, start_index, end_index, text_rule = inps | |
| reader = PdfReader(pdf_file) | |
| pages = reader.pages | |
| text = "" | |
| for page in pages[int(start_index):int(end_index)+1]: | |
| sub = page.extract_text() | |
| if text_rule: | |
| for rule in text_rule.split(";"): | |
| if rule: | |
| sub = eval(rule) | |
| text += sub | |
| return text | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| with gr.Column(): | |
| pdf_file = gr.File(label="PDF", interactive=True) | |
| with gr.Row(): | |
| start_index = gr.Number(label="start_page",value=0) | |
| end_index = gr.Number(label="end_page",value=0) | |
| text_rule = gr.Textbox(label="rule", value='sub.replace(" ", " ");') | |
| submit = gr.Button(value="submit") | |
| text_output = gr.Textbox(interactive=True) | |
| inputs = [pdf_file, start_index, end_index, text_rule] | |
| submit.click(fn=read_pdf, inputs=inputs, outputs=text_output) | |
| demo.launch() | |