import os from pypdf import PdfReader from config.rag_config import RAGConfig def load_pdfs(pdf_dir=None): pdf_dir = pdf_dir or RAGConfig().pdf_dir texts = [] for filename in os.listdir(pdf_dir): if filename.endswith(".pdf"): path = os.path.join(pdf_dir, filename) reader = PdfReader(path) text = "\n".join(page.extract_text() or "" for page in reader.pages) texts.append(text) return texts def chunk_text(text, chunk_size=500, overlap=100): chunks = [] start = 0 while start < len(text): end = start + chunk_size chunks.append(text[start:end]) start += chunk_size - overlap return chunks