Spaces:
No application file
No application file
| import PyPDF2 | |
| from docx import Document | |
| def extract_text_from_file(uploaded_file): | |
| """Handle PDF and DOCX file parsing""" | |
| text = "" | |
| if uploaded_file.type == "application/pdf": | |
| pdf_reader = PyPDF2.PdfReader(uploaded_file) | |
| text = "\n".join([page.extract_text() for page in pdf_reader.pages]) | |
| elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": | |
| doc = Document(uploaded_file) | |
| text = "\n".join([para.text for para in doc.paragraphs]) | |
| return text |