lvvignesh2122 commited on
Commit
0b8a777
·
1 Parent(s): cff5ba8

Enhance RAG: Fix 429s, Add Persistence & Validation

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. frontend/index.html +91 -66
  3. main.py +176 -74
  4. rag_store.py +141 -33
.gitignore CHANGED
@@ -14,6 +14,7 @@ venv/
14
  data/
15
  *.npy
16
  *.index
 
17
 
18
  # OS / editor
19
  .vscode/
 
14
  data/
15
  *.npy
16
  *.index
17
+ *.pkl
18
 
19
  # OS / editor
20
  .vscode/
frontend/index.html CHANGED
@@ -5,19 +5,19 @@
5
  <title>Gemini RAG Assistant</title>
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
 
8
- <!-- Fonts -->
9
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
10
 
11
  <style>
12
  :root {
13
  --bg: radial-gradient(1200px 600px at top, #e0e7ff 0%, #f8fafc 60%);
14
- --card: rgba(255,255,255,0.85);
15
  --border: rgba(15,23,42,0.08);
16
  --primary: #4f46e5;
17
  --secondary: #0ea5e9;
18
  --text: #0f172a;
19
  --muted: #64748b;
20
  --error: #dc2626;
 
21
  }
22
 
23
  * { box-sizing: border-box; font-family: Inter, sans-serif; }
@@ -34,7 +34,7 @@
34
 
35
  .container {
36
  width: 100%;
37
- max-width: 980px;
38
  background: var(--card);
39
  backdrop-filter: blur(16px);
40
  border-radius: 24px;
@@ -78,10 +78,11 @@
78
  border-radius: 14px;
79
  border: 1px solid var(--border);
80
  font-size: 0.95rem;
 
81
  }
82
 
83
  textarea {
84
- min-height: 120px;
85
  resize: vertical;
86
  }
87
 
@@ -93,7 +94,7 @@
93
  }
94
 
95
  button {
96
- padding: 12px 18px;
97
  border-radius: 14px;
98
  border: none;
99
  background: var(--primary);
@@ -112,7 +113,7 @@
112
 
113
  button:hover:not(:disabled) {
114
  transform: translateY(-1px);
115
- box-shadow: 0 10px 25px rgba(79,70,229,.35);
116
  }
117
 
118
  .status {
@@ -123,18 +124,36 @@
123
 
124
  .answer {
125
  margin-top: 24px;
126
- padding: 20px;
127
  border-radius: 16px;
128
- background: #f8fafc;
129
  border: 1px solid var(--border);
130
- white-space: pre-wrap;
131
  line-height: 1.6;
 
132
  }
133
 
134
- .error {
135
- color: var(--error);
136
- margin-top: 10px;
137
- font-weight: 500;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  }
139
 
140
  .loader {
@@ -148,51 +167,35 @@
148
  50% { opacity: 1 }
149
  100% { opacity: .4 }
150
  }
151
-
152
- footer {
153
- text-align: center;
154
- margin-top: 28px;
155
- font-size: .8rem;
156
- color: var(--muted);
157
- }
158
  </style>
159
  </head>
160
 
161
  <body>
162
- <div class="container">
163
- <h1>Gemini RAG Assistant</h1>
164
- <div class="subtitle">
165
- Upload documents · Ask questions · Get grounded answers
166
- </div>
167
-
168
- <!-- Upload -->
169
- <div class="card">
170
- <h3>📄 Upload documents</h3>
171
- <input type="file" id="files" multiple />
172
- <div class="row">
173
- <button id="uploadBtn" onclick="upload()">Upload & Index</button>
174
- </div>
175
- <div id="uploadStatus" class="status"></div>
176
  </div>
 
 
177
 
178
- <!-- Ask -->
179
- <div class="card">
180
- <h3>💬 Ask or summarize</h3>
181
- <textarea id="question" placeholder="Ask something about your documents…"></textarea>
182
- <div class="row">
183
- <button id="askBtn" onclick="ask()">Ask</button>
184
- <button class="secondary" id="sumBtn" onclick="summarize()">Summarize</button>
185
- </div>
186
  </div>
 
187
 
188
- <!-- Answer -->
189
- <div id="answerBox" class="answer" style="display:none;"></div>
190
- <div id="errorBox" class="error"></div>
191
 
192
- <footer>
193
- Built with FastAPI · FAISS · Gemini
194
- </footer>
195
- </div>
196
 
197
  <script>
198
  let busy = false;
@@ -206,29 +209,37 @@
206
 
207
  async function upload() {
208
  const files = document.getElementById("files").files;
209
- if (!files.length) return;
 
 
 
210
 
211
  setBusy(true);
212
- document.getElementById("uploadStatus").innerText = "Indexing documents…";
 
213
 
214
  const fd = new FormData();
215
  for (let f of files) fd.append("files", f);
216
 
217
- const res = await fetch("/upload", { method: "POST", body: fd });
218
- const data = await res.json();
219
-
220
- document.getElementById("uploadStatus").innerText = data.message || "Done ✅";
 
 
 
 
221
  setBusy(false);
222
  }
223
 
224
  async function ask() {
225
  const q = document.getElementById("question").value.trim();
226
- if (!q || busy) return;
227
 
228
  setBusy(true);
229
- document.getElementById("errorBox").innerText = "";
230
- document.getElementById("answerBox").style.display = "block";
231
- document.getElementById("answerBox").innerHTML = "<span class='loader'>Thinking…</span>";
232
 
233
  try {
234
  const res = await fetch("/ask", {
@@ -238,20 +249,34 @@
238
  });
239
 
240
  const data = await res.json();
241
- document.getElementById("answerBox").innerText = data.answer;
242
- } catch {
243
- document.getElementById("errorBox").innerText =
244
- "⚠️ LLM quota exceeded. Please wait ~1 minute and retry.";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  }
246
 
247
  setBusy(false);
248
  }
249
 
250
  function summarize() {
251
- document.getElementById("question").value =
252
- "Summarize the uploaded documents in 5 bullet points.";
253
  ask();
254
  }
255
  </script>
256
  </body>
257
- </html>
 
5
  <title>Gemini RAG Assistant</title>
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
 
 
8
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
9
 
10
  <style>
11
  :root {
12
  --bg: radial-gradient(1200px 600px at top, #e0e7ff 0%, #f8fafc 60%);
13
+ --card: rgba(255,255,255,0.9);
14
  --border: rgba(15,23,42,0.08);
15
  --primary: #4f46e5;
16
  --secondary: #0ea5e9;
17
  --text: #0f172a;
18
  --muted: #64748b;
19
  --error: #dc2626;
20
+ --success: #16a34a;
21
  }
22
 
23
  * { box-sizing: border-box; font-family: Inter, sans-serif; }
 
34
 
35
  .container {
36
  width: 100%;
37
+ max-width: 800px;
38
  background: var(--card);
39
  backdrop-filter: blur(16px);
40
  border-radius: 24px;
 
78
  border-radius: 14px;
79
  border: 1px solid var(--border);
80
  font-size: 0.95rem;
81
+ background: #fafafa;
82
  }
83
 
84
  textarea {
85
+ min-height: 100px;
86
  resize: vertical;
87
  }
88
 
 
94
  }
95
 
96
  button {
97
+ padding: 12px 24px;
98
  border-radius: 14px;
99
  border: none;
100
  background: var(--primary);
 
113
 
114
  button:hover:not(:disabled) {
115
  transform: translateY(-1px);
116
+ box-shadow: 0 4px 12px rgba(79,70,229,.2);
117
  }
118
 
119
  .status {
 
124
 
125
  .answer {
126
  margin-top: 24px;
127
+ padding: 22px;
128
  border-radius: 16px;
129
+ background: #fff;
130
  border: 1px solid var(--border);
 
131
  line-height: 1.6;
132
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05);
133
  }
134
 
135
+ .confidence-badge {
136
+ display: inline-block;
137
+ margin-top: 12px;
138
+ padding: 4px 12px;
139
+ border-radius: 20px;
140
+ background: #dcfce7;
141
+ color: #166534;
142
+ font-size: 0.8rem;
143
+ font-weight: 600;
144
+ }
145
+
146
+ .citations {
147
+ margin-top: 16px;
148
+ font-size: .85rem;
149
+ color: var(--muted);
150
+ border-top: 1px solid var(--border);
151
+ padding-top: 12px;
152
+ }
153
+
154
+ .citations ul {
155
+ margin: 6px 0 0;
156
+ padding-left: 20px;
157
  }
158
 
159
  .loader {
 
167
  50% { opacity: 1 }
168
  100% { opacity: .4 }
169
  }
 
 
 
 
 
 
 
170
  </style>
171
  </head>
172
 
173
  <body>
174
+ <div class="container">
175
+ <h1>Gemini RAG Assistant</h1>
176
+ <div class="subtitle">Upload documents · Ask questions · Get grounded answers</div>
177
+
178
+ <div class="card">
179
+ <h3>1. Upload Knowledge</h3>
180
+ <input type="file" id="files" multiple accept=".pdf,.txt"/>
181
+ <div class="row">
182
+ <button id="uploadBtn" onclick="upload()">Upload & Index Files</button>
 
 
 
 
 
183
  </div>
184
+ <div id="uploadStatus" class="status"></div>
185
+ </div>
186
 
187
+ <div class="card">
188
+ <h3>2. Ask or Summarize</h3>
189
+ <textarea id="question" placeholder="E.g., 'What are the main risks?' or 'Summarize the document'"></textarea>
190
+ <div class="row">
191
+ <button id="askBtn" onclick="ask()">Ask Question</button>
192
+ <button class="secondary" id="sumBtn" onclick="summarize()">Generate Summary</button>
 
 
193
  </div>
194
+ </div>
195
 
196
+ <div id="answerBox" class="answer" style="display:none;"></div>
 
 
197
 
198
+ </div>
 
 
 
199
 
200
  <script>
201
  let busy = false;
 
209
 
210
  async function upload() {
211
  const files = document.getElementById("files").files;
212
+ if (!files.length) {
213
+ alert("Please select files first.");
214
+ return;
215
+ }
216
 
217
  setBusy(true);
218
+ const statusDiv = document.getElementById("uploadStatus");
219
+ statusDiv.innerText = "Indexing documents... this may take a moment.";
220
 
221
  const fd = new FormData();
222
  for (let f of files) fd.append("files", f);
223
 
224
+ try {
225
+ const res = await fetch("/upload", { method: "POST", body: fd });
226
+ if (!res.ok) throw new Error("Upload failed");
227
+ const data = await res.json();
228
+ statusDiv.innerText = data.message || "Done ✅";
229
+ } catch (e) {
230
+ statusDiv.innerText = "Error uploading files.";
231
+ }
232
  setBusy(false);
233
  }
234
 
235
  async function ask() {
236
  const q = document.getElementById("question").value.trim();
237
+ if (!q) return;
238
 
239
  setBusy(true);
240
+ const box = document.getElementById("answerBox");
241
+ box.style.display = "block";
242
+ box.innerHTML = "<span class='loader'>Thinking...</span>";
243
 
244
  try {
245
  const res = await fetch("/ask", {
 
249
  });
250
 
251
  const data = await res.json();
252
+
253
+ let html = `<div><strong>Answer:</strong><br>${data.answer.replace(/\n/g, '<br>')}</div>`;
254
+
255
+ if (data.confidence > 0) {
256
+ html += `<div class="confidence-badge">Confidence: ${(data.confidence * 100).toFixed(0)}%</div>`;
257
+ }
258
+
259
+ if (data.citations && data.citations.length > 0) {
260
+ html += `<div class="citations"><strong>Sources:</strong><ul>`;
261
+ data.citations.forEach(c => {
262
+ html += `<li>${c.source} (Page ${c.page})</li>`;
263
+ });
264
+ html += `</ul></div>`;
265
+ }
266
+
267
+ box.innerHTML = html;
268
+
269
+ } catch (e) {
270
+ box.innerText = "⚠️ Error communicating with the server.";
271
  }
272
 
273
  setBusy(false);
274
  }
275
 
276
  function summarize() {
277
+ document.getElementById("question").value = "Summarize the uploaded documents";
 
278
  ask();
279
  }
280
  </script>
281
  </body>
282
+ </html>
main.py CHANGED
@@ -8,23 +8,22 @@ from pydantic import BaseModel
8
  from dotenv import load_dotenv
9
  import google.generativeai as genai
10
 
11
- from rag_store import ingest_documents, search_knowledge
12
 
13
- # -----------------------
14
- # Setup
15
- # -----------------------
16
  load_dotenv()
17
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
18
 
19
- app = FastAPI(
20
- title="Gemini RAG FastAPI",
21
- docs_url="/docs",
22
- redoc_url="/redoc"
23
- )
 
 
24
 
25
- # -----------------------
26
- # CORS
27
- # -----------------------
28
  app.add_middleware(
29
  CORSMiddleware,
30
  allow_origins=["*"],
@@ -32,107 +31,210 @@ app.add_middleware(
32
  allow_headers=["*"],
33
  )
34
 
35
- # -----------------------
36
- # Frontend
37
- # -----------------------
38
  app.mount("/frontend", StaticFiles(directory="frontend"), name="frontend")
39
 
40
- # -----------------------
41
- # Cache (protect quota)
42
- # -----------------------
43
- CACHE_TTL = 300 # seconds
44
- answer_cache = {}
45
 
46
- # -----------------------
47
- # Models
48
- # -----------------------
49
  class PromptRequest(BaseModel):
50
  prompt: str
51
 
52
- # -----------------------
53
- # Routes
54
- # -----------------------
55
-
56
  @app.get("/", response_class=HTMLResponse)
57
  def serve_ui():
58
  with open("frontend/index.html", "r", encoding="utf-8") as f:
59
  return f.read()
60
 
61
- # -----------------------
62
- # Upload
63
- # -----------------------
64
  @app.post("/upload")
65
  async def upload(files: list[UploadFile] = File(...)):
 
 
 
 
 
 
 
 
 
66
  try:
 
 
 
 
 
67
  chunks = ingest_documents(files)
68
- return {"message": f"Indexed {chunks} chunks from {len(files)} file(s)."}
69
  except Exception as e:
70
  return JSONResponse(status_code=400, content={"error": str(e)})
71
 
72
- # -----------------------
73
- # Ask
74
- # -----------------------
75
  @app.post("/ask")
76
  async def ask(data: PromptRequest):
77
- prompt_key = data.prompt.strip().lower()
 
78
  now = time()
79
 
80
- # 🔁 Cache
81
- if prompt_key in answer_cache:
82
- ts, cached = answer_cache[prompt_key]
83
  if now - ts < CACHE_TTL:
84
  return cached
85
 
86
- results = search_knowledge(data.prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  if not results:
88
  response = {
89
  "answer": "I don't know based on the provided documents.",
90
  "confidence": 0.0,
91
  "citations": []
92
  }
93
- answer_cache[prompt_key] = (now, response)
94
  return response
95
 
96
  context = "\n\n".join(r["text"] for r in results)
 
 
 
 
 
97
 
98
  prompt = f"""
99
- Answer strictly using the context below.
100
- If not found, say "I don't know".
101
 
102
  Context:
103
  {context}
104
 
105
  Question:
106
- {data.prompt}
107
  """
108
-
109
- try:
110
- model = genai.GenerativeModel("gemini-2.5-flash")
111
- llm_response = model.generate_content(prompt)
112
-
113
- response = {
114
- "answer": llm_response.text,
115
- "confidence": round(min(1.0, len(results) / 5), 2),
116
- "citations": [
117
- {"source": r["metadata"]["source"], "page": r["metadata"]["page"]}
118
- for r in results
119
- ]
120
- }
121
-
122
- answer_cache[prompt_key] = (now, response)
123
- return response
124
-
125
- except Exception as e:
126
- return JSONResponse(
127
- status_code=429,
128
- content={"error": "LLM quota exceeded. Please wait and retry."}
129
- )
130
-
131
- # -----------------------
132
- # Summarize
133
- # -----------------------
134
- @app.post("/summarize")
135
- async def summarize():
136
- return await ask(PromptRequest(
137
- prompt="Summarize the uploaded documents in 5 concise bullet points."
138
- ))
 
8
  from dotenv import load_dotenv
9
  import google.generativeai as genai
10
 
11
+ from rag_store import ingest_documents, search_knowledge, get_all_chunks, clear_database
12
 
13
+ # =========================================================
14
+ # ENV + MODEL SETUP
15
+ # =========================================================
16
  load_dotenv()
17
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
18
 
19
+ MODEL_NAME = "gemini-2.5-flash"
20
+ USE_MOCK = False # Set to False to use real API
21
+
22
+ # =========================================================
23
+ # APP
24
+ # =========================================================
25
+ app = FastAPI(title="Gemini RAG FastAPI")
26
 
 
 
 
27
  app.add_middleware(
28
  CORSMiddleware,
29
  allow_origins=["*"],
 
31
  allow_headers=["*"],
32
  )
33
 
 
 
 
34
  app.mount("/frontend", StaticFiles(directory="frontend"), name="frontend")
35
 
36
+ # =========================================================
37
+ # CACHE (ANTI-429)
38
+ # =========================================================
39
+ CACHE_TTL = 300 # 5 minutes
40
+ answer_cache: dict[str, tuple[float, dict]] = {}
41
 
42
+ # =========================================================
43
+ # MODELS
44
+ # =========================================================
45
  class PromptRequest(BaseModel):
46
  prompt: str
47
 
48
+ # =========================================================
49
+ # ROUTES
50
+ # =========================================================
 
51
  @app.get("/", response_class=HTMLResponse)
52
  def serve_ui():
53
  with open("frontend/index.html", "r", encoding="utf-8") as f:
54
  return f.read()
55
 
56
+ # ---------------------------------------------------------
57
+ # UPLOAD
58
+ # ---------------------------------------------------------
59
  @app.post("/upload")
60
  async def upload(files: list[UploadFile] = File(...)):
61
+ # 1. VALIDATION: Strict File Type Check
62
+ for file in files:
63
+ ext = file.filename.split(".")[-1].lower()
64
+ if ext not in ["pdf", "txt"]:
65
+ return JSONResponse(
66
+ status_code=400,
67
+ content={"error": f"Invalid file type: '{file.filename}'. Only .pdf and .txt files are allowed."}
68
+ )
69
+
70
  try:
71
+ # 2. CLEAR CONTEXT: Start fresh for every upload session
72
+ clear_database()
73
+ answer_cache.clear() # <--- CRITICAL: Clear the questions cache too!
74
+
75
+ # 3. INGEST
76
  chunks = ingest_documents(files)
77
+ return {"message": f"Successfully indexed {chunks} chunks. Previous context cleared."}
78
  except Exception as e:
79
  return JSONResponse(status_code=400, content={"error": str(e)})
80
 
81
+ # ---------------------------------------------------------
82
+ # ASK / SUMMARIZE
83
+ # ---------------------------------------------------------
84
  @app.post("/ask")
85
  async def ask(data: PromptRequest):
86
+ prompt_text = data.prompt.strip()
87
+ key = prompt_text.lower()
88
  now = time()
89
 
90
+ # ---------- CACHE ----------
91
+ if key in answer_cache:
92
+ ts, cached = answer_cache[key]
93
  if now - ts < CACHE_TTL:
94
  return cached
95
 
96
+ model = genai.GenerativeModel(MODEL_NAME)
97
+ is_summary = "summarize" in key or "summary" in key
98
+
99
+ # =====================================================
100
+ # 🟦 SUMMARY MODE (MAP–REDUCE)
101
+ # =====================================================
102
+ # Helper for rate-limit aware generation
103
+ def generate_safe(prompt_content, retries=5):
104
+ if USE_MOCK:
105
+ import time as pytime
106
+ pytime.sleep(1.5) # Simulate latency
107
+ class MockResp:
108
+ def __init__(self, text): self.text = text
109
+ @property
110
+ def prompt_feedback(self): return None
111
+
112
+ if "Summarize" in str(prompt_content):
113
+ return MockResp("- This is a mock summary point 1 (API limit reached).\n- This is point 2 demonstrating the UI works.\n- Point 3: The backend logic is sound.")
114
+ elif "Combine" in str(prompt_content):
115
+ return MockResp("Here are the final summarized points (MOCK MODE):\n\n* **System Integrity**: The RAG system is functioning correctly, handling file ingestion and chunking.\n* **Resilience**: Error handling and retry mechanisms are now in place.\n* **Mocking**: We are currently bypassing the live API to verify the frontend pipeline.\n* **Ready**: Once quotas reset, simply set USE_MOCK = False to resume live intelligence.\n* **Success**: The overall architecture is validated.")
116
+ else:
117
+ return MockResp("I am functioning in MOCK MODE because the daily API quota is exhausted. I cannot answer specific questions right now, but I confirm the system received your question: " + str(prompt_content)[:50] + "...")
118
+
119
+ import time as pytime
120
+ base_delay = 10
121
+ for attempt in range(retries + 1):
122
+ try:
123
+ # Always small delay to be nice to the API
124
+ pytime.sleep(2.0)
125
+ response = model.generate_content(prompt_content)
126
+ return response
127
+ except Exception as e:
128
+ err_str = str(e)
129
+ if "429" in err_str:
130
+ if attempt < retries:
131
+ wait_time = base_delay * (2 ** attempt)
132
+ print(f"DEBUG: 429 Rate limit hit. Retrying in {wait_time}s...")
133
+ pytime.sleep(wait_time)
134
+ continue
135
+ raise e
136
+
137
+ if is_summary:
138
+ chunks = get_all_chunks(limit=80)
139
+ print(f"DEBUG: Found {len(chunks)} chunks for summary.")
140
+
141
+ if not chunks:
142
+ return {
143
+ "answer": "No documents available to summarize.",
144
+ "confidence": 0.0,
145
+ "citations": []
146
+ }
147
+
148
+ # -----------------------------------------------------
149
+ # REFACTORED: Single-Shot Summary (Avoids Rate Limits)
150
+ # -----------------------------------------------------
151
+ all_text = "\n\n".join(c["text"] for c in chunks)
152
+ print(f"DEBUG: Total text length for summary: {len(all_text)} chars")
153
+
154
+ prompt = f"""
155
+ Summarize the following content in 5 clear, high-level bullet points.
156
+
157
+ Content:
158
+ {all_text}
159
+ """
160
+ try:
161
+ # Single call with retry logic
162
+ resp = generate_safe(prompt)
163
+ print("DEBUG: Summary generation successful.")
164
+
165
+ final_text = "Analysis complete."
166
+ try:
167
+ final_text = resp.text
168
+ except ValueError:
169
+ final_text = "Summary generation was blocked by safety filters."
170
+
171
+ response = {
172
+ "answer": final_text,
173
+ "confidence": 0.95,
174
+ "citations": list({
175
+ (c["metadata"]["source"], c["metadata"]["page"]): c["metadata"]
176
+ for c in chunks
177
+ }.values())
178
+ }
179
+
180
+ answer_cache[key] = (now, response)
181
+ return response
182
+
183
+ except Exception as e:
184
+ print(f"Summary failed: {e}")
185
+ return JSONResponse(status_code=200, content={
186
+ "answer": f"System is currently overloaded (Rate Limit). Please try again in a minute.\nDetails: {str(e)}",
187
+ "confidence": 0.0,
188
+ "citations": []
189
+ })
190
+
191
+ # =====================================================
192
+ # 🟩 Q&A MODE (RAG)
193
+ # =====================================================
194
+ results = search_knowledge(prompt_text)
195
+
196
  if not results:
197
  response = {
198
  "answer": "I don't know based on the provided documents.",
199
  "confidence": 0.0,
200
  "citations": []
201
  }
202
+ answer_cache[key] = (now, response)
203
  return response
204
 
205
  context = "\n\n".join(r["text"] for r in results)
206
+
207
+ # DEBUG: Log the context to see what the model is reading
208
+ print("DEBUG: ------------------- RAG CONTEXT -------------------")
209
+ print(context[:2000] + ("..." if len(context) > 2000 else ""))
210
+ print("DEBUG: ---------------------------------------------------")
211
 
212
  prompt = f"""
213
+ Answer using ONLY the context below.
214
+ If the answer is not present, say "I don't know".
215
 
216
  Context:
217
  {context}
218
 
219
  Question:
220
+ {prompt_text}
221
  """
222
+ llm = model.generate_content(prompt)
223
+ answer_text = llm.text
224
+
225
+ # Fix Fake Confidence: If the model says "I don't know", confidence should be 0.
226
+ confidence = round(min(1.0, len(results) / 5), 2)
227
+ if "i don't know" in answer_text.lower():
228
+ confidence = 0.0
229
+
230
+ response = {
231
+ "answer": answer_text,
232
+ "confidence": confidence,
233
+ "citations": list({
234
+ (r["metadata"]["source"], r["metadata"]["page"]): r["metadata"]
235
+ for r in results
236
+ }.values())
237
+ }
238
+
239
+ answer_cache[key] = (now, response)
240
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
rag_store.py CHANGED
@@ -1,20 +1,85 @@
1
- import os
2
  import faiss
3
  import numpy as np
 
 
4
  from pypdf import PdfReader
5
  from sentence_transformers import SentenceTransformer
6
 
7
  # -----------------------
8
- # Global in-memory state
9
  # -----------------------
10
  index = None
11
  documents = []
12
  metadata = []
13
 
 
14
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # -----------------------
17
- # Ingest uploaded files
18
  # -----------------------
19
  def ingest_documents(files):
20
  global index, documents, metadata
@@ -24,55 +89,98 @@ def ingest_documents(files):
24
 
25
  for file in files:
26
  filename = file.filename
27
-
 
28
  if filename.endswith(".pdf"):
29
  reader = PdfReader(file.file)
30
  for i, page in enumerate(reader.pages):
31
- text = page.extract_text()
32
- if text:
33
- texts.append(text)
34
- meta.append({
35
- "source": filename,
36
- "page": i + 1
37
- })
38
-
39
  elif filename.endswith(".txt"):
40
  content = file.file.read().decode("utf-8")
41
- texts.append(content)
42
- meta.append({
43
- "source": filename,
44
- "page": "N/A"
45
- })
 
 
 
 
 
 
 
46
 
47
  if not texts:
48
- raise ValueError("No readable text found.")
49
 
50
- embeddings = embedder.encode(texts)
 
 
 
 
51
 
52
- index = faiss.IndexFlatL2(embeddings.shape[1])
53
- index.add(np.array(embeddings))
 
 
 
54
 
55
- documents = texts
56
- metadata = meta
57
 
58
- return len(texts)
 
 
59
 
60
  # -----------------------
61
- # Search
62
  # -----------------------
63
- def search_knowledge(query, top_k=5):
64
  if index is None:
65
  return []
66
 
67
- query_vec = embedder.encode([query])
68
- distances, indices = index.search(query_vec, top_k)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  results = []
71
- for idx, dist in zip(indices[0], distances[0]):
 
72
  results.append({
73
- "text": documents[idx],
74
- "distance": float(dist),
75
- "metadata": metadata[idx]
76
  })
77
 
78
- return results
 
 
1
  import faiss
2
  import numpy as np
3
+ import os
4
+ import pickle
5
  from pypdf import PdfReader
6
  from sentence_transformers import SentenceTransformer
7
 
8
  # -----------------------
9
+ # Global state
10
  # -----------------------
11
  index = None
12
  documents = []
13
  metadata = []
14
 
15
+ # Using a lightweight, high-performance embedding model
16
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
17
 
18
+ CHUNK_SIZE = 800
19
+ CHUNK_OVERLAP = 200
20
+
21
+ DB_FILE_INDEX = "vector.index"
22
+ DB_FILE_META = "metadata.pkl"
23
+
24
+ # -----------------------
25
+ # Helpers
26
+ # -----------------------
27
+ def chunk_text(text):
28
+ """Splits text into overlapping chunks."""
29
+ chunks = []
30
+ start = 0
31
+ while start < len(text):
32
+ end = start + CHUNK_SIZE
33
+ chunks.append(text[start:end])
34
+ start += CHUNK_SIZE - CHUNK_OVERLAP
35
+ return chunks
36
+
37
+ def save_db():
38
+ global index, documents, metadata
39
+ if index:
40
+ faiss.write_index(index, DB_FILE_INDEX)
41
+ if documents:
42
+ with open(DB_FILE_META, "wb") as f:
43
+ pickle.dump({"documents": documents, "metadata": metadata}, f)
44
+ print("DEBUG: Knowledge base saved to disk.")
45
+
46
+ def load_db():
47
+ global index, documents, metadata
48
+ if os.path.exists(DB_FILE_INDEX) and os.path.exists(DB_FILE_META):
49
+ try:
50
+ index = faiss.read_index(DB_FILE_INDEX)
51
+ with open(DB_FILE_META, "rb") as f:
52
+ data = pickle.load(f)
53
+ documents = data["documents"]
54
+ metadata = data["metadata"]
55
+ print(f"DEBUG: Loaded {len(documents)} documents from disk.")
56
+ except Exception as e:
57
+ print(f"DEBUG: Failed to load DB: {e}")
58
+ index = None
59
+ documents = []
60
+ metadata = []
61
+ else:
62
+ print("DEBUG: No existing DB found. Starting fresh.")
63
+
64
+ # Auto-load on startup
65
+ load_db()
66
+
67
+ def clear_database():
68
+ global index, documents, metadata
69
+ index = None
70
+ documents = []
71
+ metadata = []
72
+
73
+ # Remove persistence files if they exist
74
+ if os.path.exists(DB_FILE_INDEX):
75
+ os.remove(DB_FILE_INDEX)
76
+ if os.path.exists(DB_FILE_META):
77
+ os.remove(DB_FILE_META)
78
+
79
+ print("DEBUG: Database cleared.")
80
+
81
  # -----------------------
82
+ # Ingest
83
  # -----------------------
84
  def ingest_documents(files):
85
  global index, documents, metadata
 
89
 
90
  for file in files:
91
  filename = file.filename
92
+
93
+ # Handle PDFs
94
  if filename.endswith(".pdf"):
95
  reader = PdfReader(file.file)
96
  for i, page in enumerate(reader.pages):
97
+ page_text = page.extract_text()
98
+ if page_text:
99
+ for chunk in chunk_text(page_text):
100
+ texts.append(chunk)
101
+ meta.append({"source": filename, "page": i + 1})
102
+
103
+ # Handle Text files
 
104
  elif filename.endswith(".txt"):
105
  content = file.file.read().decode("utf-8")
106
+ for chunk in chunk_text(content):
107
+ texts.append(chunk)
108
+ meta.append({"source": filename, "page": "N/A"})
109
+
110
+ # Check for empty or unreadable content
111
+ total_length = sum(len(t) for t in texts)
112
+ if total_length < 50:
113
+ raise ValueError(
114
+ "Extracted text is too short or empty. "
115
+ "If this is a PDF, it might be a scanned image without a text layer. "
116
+ "Please use a text-selectable PDF or a .txt file."
117
+ )
118
 
119
  if not texts:
120
+ raise ValueError("No readable text found in documents.")
121
 
122
+ # Create Embeddings (Normalized for better cosine similarity)
123
+ # append to existing if needed, but for now simplistic re-build or append?
124
+ # Simpler to just ADD to the existing index.
125
+
126
+ new_embeddings = embedder.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
127
 
128
+ if index is None:
129
+ # USE INNER PRODUCT (Cosine Similarity) for normalized vectors
130
+ index = faiss.IndexFlatIP(new_embeddings.shape[1])
131
+
132
+ index.add(new_embeddings)
133
 
134
+ documents.extend(texts)
135
+ metadata.extend(meta)
136
 
137
+ save_db()
138
+
139
+ return len(documents)
140
 
141
  # -----------------------
142
+ # Q&A Search (filtered)
143
  # -----------------------
144
+ def search_knowledge(query, top_k=5, min_similarity=0.3):
145
  if index is None:
146
  return []
147
 
148
+ # SEARCH with normalized query
149
+ query_vec = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True)
150
+
151
+ # FAISS returns scores (dot product), which = cosine similarity for normalized vectors
152
+ scores, indices = index.search(query_vec, top_k)
153
+
154
+ results = []
155
+ print(f"DEBUG: Query: '{query}'")
156
+ for idx, score in zip(indices[0], scores[0]):
157
+ if idx == -1: continue # FAISS padding
158
+
159
+ print(f"DEBUG: Found chunk {idx} with score {score:.4f}")
160
+
161
+ # Filter out results that are too irrelevant (score too low)
162
+ if score > min_similarity:
163
+ results.append({
164
+ "text": documents[idx],
165
+ "metadata": metadata[idx],
166
+ "score": float(score)
167
+ })
168
+
169
+ return results
170
+
171
+ # -----------------------
172
+ # Summary Retrieval (NO FILTER)
173
+ # -----------------------
174
+ def get_all_chunks(limit=50):
175
+ if not documents:
176
+ return []
177
 
178
  results = []
179
+ # Return a sample of chunks for summarization
180
+ for text, meta in zip(documents[:limit], metadata[:limit]):
181
  results.append({
182
+ "text": text,
183
+ "metadata": meta
 
184
  })
185
 
186
+ return results