Spaces:

lvvignesh2122
/

Gemini-Rag-Fastapi-Pro

Running

App Files Files Community

lvvignesh2122 commited on 24 days ago

Commit

0b8a777

1 Parent(s): cff5ba8

Enhance RAG: Fix 429s, Add Persistence & Validation

Browse files

Files changed (4) hide show

.gitignore +1 -0
frontend/index.html +91 -66
main.py +176 -74
rag_store.py +141 -33

.gitignore CHANGED Viewed

@@ -14,6 +14,7 @@ venv/
 data/
 *.npy
 *.index
 # OS / editor
 .vscode/

 data/
 *.npy
 *.index
+*.pkl
 # OS / editor
 .vscode/

frontend/index.html CHANGED Viewed

@@ -5,19 +5,19 @@
   <title>Gemini RAG Assistant</title>
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <!-- Fonts -->
   <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
   <style>
     :root {
       --bg: radial-gradient(1200px 600px at top, #e0e7ff 0%, #f8fafc 60%);
-      --card: rgba(255,255,255,0.85);
       --border: rgba(15,23,42,0.08);
       --primary: #4f46e5;
       --secondary: #0ea5e9;
       --text: #0f172a;
       --muted: #64748b;
       --error: #dc2626;
     }
     * { box-sizing: border-box; font-family: Inter, sans-serif; }
@@ -34,7 +34,7 @@
     .container {
       width: 100%;
-      max-width: 980px;
       background: var(--card);
       backdrop-filter: blur(16px);
       border-radius: 24px;
@@ -78,10 +78,11 @@
       border-radius: 14px;
       border: 1px solid var(--border);
       font-size: 0.95rem;
     }
     textarea {
-      min-height: 120px;
       resize: vertical;
     }
@@ -93,7 +94,7 @@
     }
     button {
-      padding: 12px 18px;
       border-radius: 14px;
       border: none;
       background: var(--primary);
@@ -112,7 +113,7 @@
     button:hover:not(:disabled) {
       transform: translateY(-1px);
-      box-shadow: 0 10px 25px rgba(79,70,229,.35);
     }
     .status {
@@ -123,18 +124,36 @@
     .answer {
       margin-top: 24px;
-      padding: 20px;
       border-radius: 16px;
-      background: #f8fafc;
       border: 1px solid var(--border);
-      white-space: pre-wrap;
       line-height: 1.6;
     }
-    .error {
-      color: var(--error);
-      margin-top: 10px;
-      font-weight: 500;
     }
     .loader {
@@ -148,51 +167,35 @@
       50% { opacity: 1 }
       100% { opacity: .4 }
     }
-    footer {
-      text-align: center;
-      margin-top: 28px;
-      font-size: .8rem;
-      color: var(--muted);
-    }
   </style>
 </head>
 <body>
-  <div class="container">
-    <h1>Gemini RAG Assistant</h1>
-    <div class="subtitle">
-      Upload documents · Ask questions · Get grounded answers
-    </div>
-    <!-- Upload -->
-    <div class="card">
-      <h3>📄 Upload documents</h3>
-      <input type="file" id="files" multiple />
-      <div class="row">
-        <button id="uploadBtn" onclick="upload()">Upload & Index</button>
-      </div>
-      <div id="uploadStatus" class="status"></div>
     </div>
-    <!-- Ask -->
-    <div class="card">
-      <h3>💬 Ask or summarize</h3>
-      <textarea id="question" placeholder="Ask something about your documents…"></textarea>
-      <div class="row">
-        <button id="askBtn" onclick="ask()">Ask</button>
-        <button class="secondary" id="sumBtn" onclick="summarize()">Summarize</button>
-      </div>
     </div>
-    <!-- Answer -->
-    <div id="answerBox" class="answer" style="display:none;"></div>
-    <div id="errorBox" class="error"></div>
-    <footer>
-      Built with FastAPI · FAISS · Gemini
-    </footer>
-  </div>
 <script>
   let busy = false;
@@ -206,29 +209,37 @@
   async function upload() {
     const files = document.getElementById("files").files;
-    if (!files.length) return;
     setBusy(true);
-    document.getElementById("uploadStatus").innerText = "Indexing documents…";
     const fd = new FormData();
     for (let f of files) fd.append("files", f);
-    const res = await fetch("/upload", { method: "POST", body: fd });
-    const data = await res.json();
-    document.getElementById("uploadStatus").innerText = data.message || "Done ✅";
     setBusy(false);
   }
   async function ask() {
     const q = document.getElementById("question").value.trim();
-    if (!q || busy) return;
     setBusy(true);
-    document.getElementById("errorBox").innerText = "";
-    document.getElementById("answerBox").style.display = "block";
-    document.getElementById("answerBox").innerHTML = "<span class='loader'>Thinking…</span>";
     try {
       const res = await fetch("/ask", {
@@ -238,20 +249,34 @@
       });
       const data = await res.json();
-      document.getElementById("answerBox").innerText = data.answer;
-    } catch {
-      document.getElementById("errorBox").innerText =
-        "⚠️ LLM quota exceeded. Please wait ~1 minute and retry.";
     }
     setBusy(false);
   }
   function summarize() {
-    document.getElementById("question").value =
-      "Summarize the uploaded documents in 5 bullet points.";
     ask();
   }
 </script>
 </body>
-</html>

   <title>Gemini RAG Assistant</title>
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
   <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
   <style>
     :root {
       --bg: radial-gradient(1200px 600px at top, #e0e7ff 0%, #f8fafc 60%);
+      --card: rgba(255,255,255,0.9);
       --border: rgba(15,23,42,0.08);
       --primary: #4f46e5;
       --secondary: #0ea5e9;
       --text: #0f172a;
       --muted: #64748b;
       --error: #dc2626;
+      --success: #16a34a;
     }
     * { box-sizing: border-box; font-family: Inter, sans-serif; }
     .container {
       width: 100%;
+      max-width: 800px;
       background: var(--card);
       backdrop-filter: blur(16px);
       border-radius: 24px;
       border-radius: 14px;
       border: 1px solid var(--border);
       font-size: 0.95rem;
+      background: #fafafa;
     }
     textarea {
+      min-height: 100px;
       resize: vertical;
     }
     }
     button {
+      padding: 12px 24px;
       border-radius: 14px;
       border: none;
       background: var(--primary);
     button:hover:not(:disabled) {
       transform: translateY(-1px);
+      box-shadow: 0 4px 12px rgba(79,70,229,.2);
     }
     .status {
     .answer {
       margin-top: 24px;
+      padding: 22px;
       border-radius: 16px;
+      background: #fff;
       border: 1px solid var(--border);
       line-height: 1.6;
+      box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05);
     }
+    .confidence-badge {
+      display: inline-block;
+      margin-top: 12px;
+      padding: 4px 12px;
+      border-radius: 20px;
+      background: #dcfce7;
+      color: #166534;
+      font-size: 0.8rem;
+      font-weight: 600;
+    }
+    .citations {
+      margin-top: 16px;
+      font-size: .85rem;
+      color: var(--muted);
+      border-top: 1px solid var(--border);
+      padding-top: 12px;
+    }
+    .citations ul {
+      margin: 6px 0 0;
+      padding-left: 20px;
     }
     .loader {
       50% { opacity: 1 }
       100% { opacity: .4 }
     }
   </style>
 </head>
 <body>
+<div class="container">
+  <h1>Gemini RAG Assistant</h1>
+  <div class="subtitle">Upload documents · Ask questions · Get grounded answers</div>
+  <div class="card">
+    <h3>1. Upload Knowledge</h3>
+    <input type="file" id="files" multiple accept=".pdf,.txt"/>
+    <div class="row">
+      <button id="uploadBtn" onclick="upload()">Upload & Index Files</button>
     </div>
+    <div id="uploadStatus" class="status"></div>
+  </div>
+  <div class="card">
+    <h3>2. Ask or Summarize</h3>
+    <textarea id="question" placeholder="E.g., 'What are the main risks?' or 'Summarize the document'"></textarea>
+    <div class="row">
+      <button id="askBtn" onclick="ask()">Ask Question</button>
+      <button class="secondary" id="sumBtn" onclick="summarize()">Generate Summary</button>
     </div>
+  </div>
+  <div id="answerBox" class="answer" style="display:none;"></div>
+</div>
 <script>
   let busy = false;
   async function upload() {
     const files = document.getElementById("files").files;
+    if (!files.length) {
+      alert("Please select files first.");
+      return;
+    }
     setBusy(true);
+    const statusDiv = document.getElementById("uploadStatus");
+    statusDiv.innerText = "Indexing documents... this may take a moment.";
     const fd = new FormData();
     for (let f of files) fd.append("files", f);
+    try {
+      const res = await fetch("/upload", { method: "POST", body: fd });
+      if (!res.ok) throw new Error("Upload failed");
+      const data = await res.json();
+      statusDiv.innerText = data.message || "Done ✅";
+    } catch (e) {
+      statusDiv.innerText = "Error uploading files.";
+    }
     setBusy(false);
   }
   async function ask() {
     const q = document.getElementById("question").value.trim();
+    if (!q) return;
     setBusy(true);
+    const box = document.getElementById("answerBox");
+    box.style.display = "block";
+    box.innerHTML = "<span class='loader'>Thinking...</span>";
     try {
       const res = await fetch("/ask", {
       });
       const data = await res.json();
+      let html = `<div><strong>Answer:</strong><br>${data.answer.replace(/\n/g, '<br>')}</div>`;
+      if (data.confidence > 0) {
+        html += `<div class="confidence-badge">Confidence: ${(data.confidence * 100).toFixed(0)}%</div>`;
+      }
+      if (data.citations && data.citations.length > 0) {
+        html += `<div class="citations"><strong>Sources:</strong><ul>`;
+        data.citations.forEach(c => {
+          html += `<li>${c.source} (Page ${c.page})</li>`;
+        });
+        html += `</ul></div>`;
+      }
+      box.innerHTML = html;
+    } catch (e) {
+      box.innerText = "⚠️ Error communicating with the server.";
     }
     setBusy(false);
   }
   function summarize() {
+    document.getElementById("question").value = "Summarize the uploaded documents";
     ask();
   }
 </script>
 </body>
+</html>

main.py CHANGED Viewed

@@ -8,23 +8,22 @@ from pydantic import BaseModel
 from dotenv import load_dotenv
 import google.generativeai as genai
-from rag_store import ingest_documents, search_knowledge
-# -----------------------
-# Setup
-# -----------------------
 load_dotenv()
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
-app = FastAPI(
-    title="Gemini RAG FastAPI",
-    docs_url="/docs",
-    redoc_url="/redoc"
-)
-# -----------------------
-# CORS
-# -----------------------
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -32,107 +31,210 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# -----------------------
-# Frontend
-# -----------------------
 app.mount("/frontend", StaticFiles(directory="frontend"), name="frontend")
-# -----------------------
-# Cache (protect quota)
-# -----------------------
-CACHE_TTL = 300  # seconds
-answer_cache = {}
-# -----------------------
-# Models
-# -----------------------
 class PromptRequest(BaseModel):
     prompt: str
-# -----------------------
-# Routes
-# -----------------------
 @app.get("/", response_class=HTMLResponse)
 def serve_ui():
     with open("frontend/index.html", "r", encoding="utf-8") as f:
         return f.read()
-# -----------------------
-# Upload
-# -----------------------
 @app.post("/upload")
 async def upload(files: list[UploadFile] = File(...)):
     try:
         chunks = ingest_documents(files)
-        return {"message": f"Indexed {chunks} chunks from {len(files)} file(s)."}
     except Exception as e:
         return JSONResponse(status_code=400, content={"error": str(e)})
-# -----------------------
-# Ask
-# -----------------------
 @app.post("/ask")
 async def ask(data: PromptRequest):
-    prompt_key = data.prompt.strip().lower()
     now = time()
-    # 🔁 Cache
-    if prompt_key in answer_cache:
-        ts, cached = answer_cache[prompt_key]
         if now - ts < CACHE_TTL:
             return cached
-    results = search_knowledge(data.prompt)
     if not results:
         response = {
             "answer": "I don't know based on the provided documents.",
             "confidence": 0.0,
             "citations": []
         }
-        answer_cache[prompt_key] = (now, response)
         return response
     context = "\n\n".join(r["text"] for r in results)
     prompt = f"""
-Answer strictly using the context below.
-If not found, say "I don't know".
 Context:
 {context}
 Question:
-{data.prompt}
 """
-    try:
-        model = genai.GenerativeModel("gemini-2.5-flash")
-        llm_response = model.generate_content(prompt)
-        response = {
-            "answer": llm_response.text,
-            "confidence": round(min(1.0, len(results) / 5), 2),
-            "citations": [
-                {"source": r["metadata"]["source"], "page": r["metadata"]["page"]}
-                for r in results
-            ]
-        }
-        answer_cache[prompt_key] = (now, response)
-        return response
-    except Exception as e:
-        return JSONResponse(
-            status_code=429,
-            content={"error": "LLM quota exceeded. Please wait and retry."}
-        )
-# -----------------------
-# Summarize
-# -----------------------
-@app.post("/summarize")
-async def summarize():
-    return await ask(PromptRequest(
-        prompt="Summarize the uploaded documents in 5 concise bullet points."
-    ))

 from dotenv import load_dotenv
 import google.generativeai as genai
+from rag_store import ingest_documents, search_knowledge, get_all_chunks, clear_database
+# =========================================================
+# ENV + MODEL SETUP
+# =========================================================
 load_dotenv()
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
+MODEL_NAME = "gemini-2.5-flash"
+USE_MOCK = False # Set to False to use real API
+# =========================================================
+# APP
+# =========================================================
+app = FastAPI(title="Gemini RAG FastAPI")
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
 app.mount("/frontend", StaticFiles(directory="frontend"), name="frontend")
+# =========================================================
+# CACHE (ANTI-429)
+# =========================================================
+CACHE_TTL = 300  # 5 minutes
+answer_cache: dict[str, tuple[float, dict]] = {}
+# =========================================================
+# MODELS
+# =========================================================
 class PromptRequest(BaseModel):
     prompt: str
+# =========================================================
+# ROUTES
+# =========================================================
 @app.get("/", response_class=HTMLResponse)
 def serve_ui():
     with open("frontend/index.html", "r", encoding="utf-8") as f:
         return f.read()
+# ---------------------------------------------------------
+# UPLOAD
+# ---------------------------------------------------------
 @app.post("/upload")
 async def upload(files: list[UploadFile] = File(...)):
+    # 1. VALIDATION: Strict File Type Check
+    for file in files:
+        ext = file.filename.split(".")[-1].lower()
+        if ext not in ["pdf", "txt"]:
+            return JSONResponse(
+                status_code=400,
+                content={"error": f"Invalid file type: '{file.filename}'. Only .pdf and .txt files are allowed."}
+            )
     try:
+        # 2. CLEAR CONTEXT: Start fresh for every upload session
+        clear_database()
+        answer_cache.clear() # <--- CRITICAL: Clear the questions cache too!
+        # 3. INGEST
         chunks = ingest_documents(files)
+        return {"message": f"Successfully indexed {chunks} chunks. Previous context cleared."}
     except Exception as e:
         return JSONResponse(status_code=400, content={"error": str(e)})
+# ---------------------------------------------------------
+# ASK / SUMMARIZE
+# ---------------------------------------------------------
 @app.post("/ask")
 async def ask(data: PromptRequest):
+    prompt_text = data.prompt.strip()
+    key = prompt_text.lower()
     now = time()
+    # ---------- CACHE ----------
+    if key in answer_cache:
+        ts, cached = answer_cache[key]
         if now - ts < CACHE_TTL:
             return cached
+    model = genai.GenerativeModel(MODEL_NAME)
+    is_summary = "summarize" in key or "summary" in key
+    # =====================================================
+    # 🟦 SUMMARY MODE (MAP–REDUCE)
+    # =====================================================
+    # Helper for rate-limit aware generation
+    def generate_safe(prompt_content, retries=5):
+        if USE_MOCK:
+            import time as pytime
+            pytime.sleep(1.5) # Simulate latency
+            class MockResp:
+                def __init__(self, text): self.text = text
+                @property
+                def prompt_feedback(self): return None
+            if "Summarize" in str(prompt_content):
+                return MockResp("- This is a mock summary point 1 (API limit reached).\n- This is point 2 demonstrating the UI works.\n- Point 3: The backend logic is sound.")
+            elif "Combine" in str(prompt_content):
+                 return MockResp("Here are the final summarized points (MOCK MODE):\n\n* **System Integrity**: The RAG system is functioning correctly, handling file ingestion and chunking.\n* **Resilience**: Error handling and retry mechanisms are now in place.\n* **Mocking**: We are currently bypassing the live API to verify the frontend pipeline.\n* **Ready**: Once quotas reset, simply set USE_MOCK = False to resume live intelligence.\n* **Success**: The overall architecture is validated.")
+            else:
+                return MockResp("I am functioning in MOCK MODE because the daily API quota is exhausted. I cannot answer specific questions right now, but I confirm the system received your question: " + str(prompt_content)[:50] + "...")
+        import time as pytime
+        base_delay = 10
+        for attempt in range(retries + 1):
+            try:
+                # Always small delay to be nice to the API
+                pytime.sleep(2.0)
+                response = model.generate_content(prompt_content)
+                return response
+            except Exception as e:
+                err_str = str(e)
+                if "429" in err_str:
+                    if attempt < retries:
+                        wait_time = base_delay * (2 ** attempt)
+                        print(f"DEBUG: 429 Rate limit hit. Retrying in {wait_time}s...")
+                        pytime.sleep(wait_time)
+                        continue
+                raise e
+    if is_summary:
+        chunks = get_all_chunks(limit=80)
+        print(f"DEBUG: Found {len(chunks)} chunks for summary.")
+        if not chunks:
+            return {
+                "answer": "No documents available to summarize.",
+                "confidence": 0.0,
+                "citations": []
+            }
+        # -----------------------------------------------------
+        # REFACTORED: Single-Shot Summary (Avoids Rate Limits)
+        # -----------------------------------------------------
+        all_text = "\n\n".join(c["text"] for c in chunks)
+        print(f"DEBUG: Total text length for summary: {len(all_text)} chars")
+        prompt = f"""
+Summarize the following content in 5 clear, high-level bullet points.
+Content:
+{all_text}
+"""
+        try:
+            # Single call with retry logic
+            resp = generate_safe(prompt)
+            print("DEBUG: Summary generation successful.")
+            final_text = "Analysis complete."
+            try:
+                final_text = resp.text
+            except ValueError:
+                final_text = "Summary generation was blocked by safety filters."
+            response = {
+                "answer": final_text,
+                "confidence": 0.95,
+                "citations": list({
+                    (c["metadata"]["source"], c["metadata"]["page"]): c["metadata"]
+                    for c in chunks
+                }.values())
+            }
+            answer_cache[key] = (now, response)
+            return response
+        except Exception as e:
+            print(f"Summary failed: {e}")
+            return JSONResponse(status_code=200, content={
+                "answer": f"System is currently overloaded (Rate Limit). Please try again in a minute.\nDetails: {str(e)}",
+                "confidence": 0.0,
+                "citations": []
+            })
+    # =====================================================
+    # 🟩 Q&A MODE (RAG)
+    # =====================================================
+    results = search_knowledge(prompt_text)
     if not results:
         response = {
             "answer": "I don't know based on the provided documents.",
             "confidence": 0.0,
             "citations": []
         }
+        answer_cache[key] = (now, response)
         return response
     context = "\n\n".join(r["text"] for r in results)
+    # DEBUG: Log the context to see what the model is reading
+    print("DEBUG: ------------------- RAG CONTEXT -------------------")
+    print(context[:2000] + ("..." if len(context) > 2000 else ""))
+    print("DEBUG: ---------------------------------------------------")
     prompt = f"""
+Answer using ONLY the context below.
+If the answer is not present, say "I don't know".
 Context:
 {context}
 Question:
+{prompt_text}
 """
+    llm = model.generate_content(prompt)
+    answer_text = llm.text
+    # Fix Fake Confidence: If the model says "I don't know", confidence should be 0.
+    confidence = round(min(1.0, len(results) / 5), 2)
+    if "i don't know" in answer_text.lower():
+        confidence = 0.0
+    response = {
+        "answer": answer_text,
+        "confidence": confidence,
+        "citations": list({
+            (r["metadata"]["source"], r["metadata"]["page"]): r["metadata"]
+            for r in results
+        }.values())
+    }
+    answer_cache[key] = (now, response)
+    return response

rag_store.py CHANGED Viewed

@@ -1,20 +1,85 @@
-import os
 import faiss
 import numpy as np
 from pypdf import PdfReader
 from sentence_transformers import SentenceTransformer
 # -----------------------
-# Global in-memory state
 # -----------------------
 index = None
 documents = []
 metadata = []
 embedder = SentenceTransformer("all-MiniLM-L6-v2")
 # -----------------------
-# Ingest uploaded files
 # -----------------------
 def ingest_documents(files):
     global index, documents, metadata
@@ -24,55 +89,98 @@ def ingest_documents(files):
     for file in files:
         filename = file.filename
         if filename.endswith(".pdf"):
             reader = PdfReader(file.file)
             for i, page in enumerate(reader.pages):
-                text = page.extract_text()
-                if text:
-                    texts.append(text)
-                    meta.append({
-                        "source": filename,
-                        "page": i + 1
-                    })
         elif filename.endswith(".txt"):
             content = file.file.read().decode("utf-8")
-            texts.append(content)
-            meta.append({
-                "source": filename,
-                "page": "N/A"
-            })
     if not texts:
-        raise ValueError("No readable text found.")
-    embeddings = embedder.encode(texts)
-    index = faiss.IndexFlatL2(embeddings.shape[1])
-    index.add(np.array(embeddings))
-    documents = texts
-    metadata = meta
-    return len(texts)
 # -----------------------
-# Search
 # -----------------------
-def search_knowledge(query, top_k=5):
     if index is None:
         return []
-    query_vec = embedder.encode([query])
-    distances, indices = index.search(query_vec, top_k)
     results = []
-    for idx, dist in zip(indices[0], distances[0]):
         results.append({
-            "text": documents[idx],
-            "distance": float(dist),
-            "metadata": metadata[idx]
         })
-    return results

 import faiss
 import numpy as np
+import os
+import pickle
 from pypdf import PdfReader
 from sentence_transformers import SentenceTransformer
 # -----------------------
+# Global state
 # -----------------------
 index = None
 documents = []
 metadata = []
+# Using a lightweight, high-performance embedding model
 embedder = SentenceTransformer("all-MiniLM-L6-v2")
+CHUNK_SIZE = 800
+CHUNK_OVERLAP = 200
+DB_FILE_INDEX = "vector.index"
+DB_FILE_META = "metadata.pkl"
+# -----------------------
+# Helpers
+# -----------------------
+def chunk_text(text):
+    """Splits text into overlapping chunks."""
+    chunks = []
+    start = 0
+    while start < len(text):
+        end = start + CHUNK_SIZE
+        chunks.append(text[start:end])
+        start += CHUNK_SIZE - CHUNK_OVERLAP
+    return chunks
+def save_db():
+    global index, documents, metadata
+    if index:
+        faiss.write_index(index, DB_FILE_INDEX)
+    if documents:
+        with open(DB_FILE_META, "wb") as f:
+            pickle.dump({"documents": documents, "metadata": metadata}, f)
+    print("DEBUG: Knowledge base saved to disk.")
+def load_db():
+    global index, documents, metadata
+    if os.path.exists(DB_FILE_INDEX) and os.path.exists(DB_FILE_META):
+        try:
+            index = faiss.read_index(DB_FILE_INDEX)
+            with open(DB_FILE_META, "rb") as f:
+                data = pickle.load(f)
+                documents = data["documents"]
+                metadata = data["metadata"]
+            print(f"DEBUG: Loaded {len(documents)} documents from disk.")
+        except Exception as e:
+            print(f"DEBUG: Failed to load DB: {e}")
+            index = None
+            documents = []
+            metadata = []
+    else:
+        print("DEBUG: No existing DB found. Starting fresh.")
+# Auto-load on startup
+load_db()
+def clear_database():
+    global index, documents, metadata
+    index = None
+    documents = []
+    metadata = []
+    # Remove persistence files if they exist
+    if os.path.exists(DB_FILE_INDEX):
+        os.remove(DB_FILE_INDEX)
+    if os.path.exists(DB_FILE_META):
+        os.remove(DB_FILE_META)
+    print("DEBUG: Database cleared.")
 # -----------------------
+# Ingest
 # -----------------------
 def ingest_documents(files):
     global index, documents, metadata
     for file in files:
         filename = file.filename
+        # Handle PDFs
         if filename.endswith(".pdf"):
             reader = PdfReader(file.file)
             for i, page in enumerate(reader.pages):
+                page_text = page.extract_text()
+                if page_text:
+                    for chunk in chunk_text(page_text):
+                        texts.append(chunk)
+                        meta.append({"source": filename, "page": i + 1})
+        # Handle Text files
         elif filename.endswith(".txt"):
             content = file.file.read().decode("utf-8")
+            for chunk in chunk_text(content):
+                texts.append(chunk)
+                meta.append({"source": filename, "page": "N/A"})
+    # Check for empty or unreadable content
+    total_length = sum(len(t) for t in texts)
+    if total_length < 50:
+        raise ValueError(
+            "Extracted text is too short or empty. "
+            "If this is a PDF, it might be a scanned image without a text layer. "
+            "Please use a text-selectable PDF or a .txt file."
+        )
     if not texts:
+        raise ValueError("No readable text found in documents.")
+    # Create Embeddings (Normalized for better cosine similarity)
+    # append to existing if needed, but for now simplistic re-build or append?
+    # Simpler to just ADD to the existing index.
+    new_embeddings = embedder.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
+    if index is None:
+        # USE INNER PRODUCT (Cosine Similarity) for normalized vectors
+        index = faiss.IndexFlatIP(new_embeddings.shape[1])
+    index.add(new_embeddings)
+    documents.extend(texts)
+    metadata.extend(meta)
+    save_db()
+    return len(documents)
 # -----------------------
+# Q&A Search (filtered)
 # -----------------------
+def search_knowledge(query, top_k=5, min_similarity=0.3):
     if index is None:
         return []
+    # SEARCH with normalized query
+    query_vec = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True)
+    # FAISS returns scores (dot product), which = cosine similarity for normalized vectors
+    scores, indices = index.search(query_vec, top_k)
+    results = []
+    print(f"DEBUG: Query: '{query}'")
+    for idx, score in zip(indices[0], scores[0]):
+        if idx == -1: continue # FAISS padding
+        print(f"DEBUG: Found chunk {idx} with score {score:.4f}")
+        # Filter out results that are too irrelevant (score too low)
+        if score > min_similarity:
+            results.append({
+                "text": documents[idx],
+                "metadata": metadata[idx],
+                "score": float(score)
+            })
+    return results
+# -----------------------
+# Summary Retrieval (NO FILTER)
+# -----------------------
+def get_all_chunks(limit=50):
+    if not documents:
+        return []
     results = []
+    # Return a sample of chunks for summarization
+    for text, meta in zip(documents[:limit], metadata[:limit]):
         results.append({
+            "text": text,
+            "metadata": meta
         })
+    return results