Commit
·
0b8a777
1
Parent(s):
cff5ba8
Enhance RAG: Fix 429s, Add Persistence & Validation
Browse files- .gitignore +1 -0
- frontend/index.html +91 -66
- main.py +176 -74
- rag_store.py +141 -33
.gitignore
CHANGED
|
@@ -14,6 +14,7 @@ venv/
|
|
| 14 |
data/
|
| 15 |
*.npy
|
| 16 |
*.index
|
|
|
|
| 17 |
|
| 18 |
# OS / editor
|
| 19 |
.vscode/
|
|
|
|
| 14 |
data/
|
| 15 |
*.npy
|
| 16 |
*.index
|
| 17 |
+
*.pkl
|
| 18 |
|
| 19 |
# OS / editor
|
| 20 |
.vscode/
|
frontend/index.html
CHANGED
|
@@ -5,19 +5,19 @@
|
|
| 5 |
<title>Gemini RAG Assistant</title>
|
| 6 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 7 |
|
| 8 |
-
<!-- Fonts -->
|
| 9 |
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
|
| 10 |
|
| 11 |
<style>
|
| 12 |
:root {
|
| 13 |
--bg: radial-gradient(1200px 600px at top, #e0e7ff 0%, #f8fafc 60%);
|
| 14 |
-
--card: rgba(255,255,255,0.
|
| 15 |
--border: rgba(15,23,42,0.08);
|
| 16 |
--primary: #4f46e5;
|
| 17 |
--secondary: #0ea5e9;
|
| 18 |
--text: #0f172a;
|
| 19 |
--muted: #64748b;
|
| 20 |
--error: #dc2626;
|
|
|
|
| 21 |
}
|
| 22 |
|
| 23 |
* { box-sizing: border-box; font-family: Inter, sans-serif; }
|
|
@@ -34,7 +34,7 @@
|
|
| 34 |
|
| 35 |
.container {
|
| 36 |
width: 100%;
|
| 37 |
-
max-width:
|
| 38 |
background: var(--card);
|
| 39 |
backdrop-filter: blur(16px);
|
| 40 |
border-radius: 24px;
|
|
@@ -78,10 +78,11 @@
|
|
| 78 |
border-radius: 14px;
|
| 79 |
border: 1px solid var(--border);
|
| 80 |
font-size: 0.95rem;
|
|
|
|
| 81 |
}
|
| 82 |
|
| 83 |
textarea {
|
| 84 |
-
min-height:
|
| 85 |
resize: vertical;
|
| 86 |
}
|
| 87 |
|
|
@@ -93,7 +94,7 @@
|
|
| 93 |
}
|
| 94 |
|
| 95 |
button {
|
| 96 |
-
padding: 12px
|
| 97 |
border-radius: 14px;
|
| 98 |
border: none;
|
| 99 |
background: var(--primary);
|
|
@@ -112,7 +113,7 @@
|
|
| 112 |
|
| 113 |
button:hover:not(:disabled) {
|
| 114 |
transform: translateY(-1px);
|
| 115 |
-
box-shadow: 0
|
| 116 |
}
|
| 117 |
|
| 118 |
.status {
|
|
@@ -123,18 +124,36 @@
|
|
| 123 |
|
| 124 |
.answer {
|
| 125 |
margin-top: 24px;
|
| 126 |
-
padding:
|
| 127 |
border-radius: 16px;
|
| 128 |
-
background: #
|
| 129 |
border: 1px solid var(--border);
|
| 130 |
-
white-space: pre-wrap;
|
| 131 |
line-height: 1.6;
|
|
|
|
| 132 |
}
|
| 133 |
|
| 134 |
-
.
|
| 135 |
-
|
| 136 |
-
margin-top:
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
}
|
| 139 |
|
| 140 |
.loader {
|
|
@@ -148,51 +167,35 @@
|
|
| 148 |
50% { opacity: 1 }
|
| 149 |
100% { opacity: .4 }
|
| 150 |
}
|
| 151 |
-
|
| 152 |
-
footer {
|
| 153 |
-
text-align: center;
|
| 154 |
-
margin-top: 28px;
|
| 155 |
-
font-size: .8rem;
|
| 156 |
-
color: var(--muted);
|
| 157 |
-
}
|
| 158 |
</style>
|
| 159 |
</head>
|
| 160 |
|
| 161 |
<body>
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
<div class="
|
| 170 |
-
<
|
| 171 |
-
<input type="file" id="files" multiple />
|
| 172 |
-
<div class="row">
|
| 173 |
-
<button id="uploadBtn" onclick="upload()">Upload & Index</button>
|
| 174 |
-
</div>
|
| 175 |
-
<div id="uploadStatus" class="status"></div>
|
| 176 |
</div>
|
|
|
|
|
|
|
| 177 |
|
| 178 |
-
|
| 179 |
-
<
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
<
|
| 183 |
-
|
| 184 |
-
<button class="secondary" id="sumBtn" onclick="summarize()">Summarize</button>
|
| 185 |
-
</div>
|
| 186 |
</div>
|
|
|
|
| 187 |
|
| 188 |
-
|
| 189 |
-
<div id="answerBox" class="answer" style="display:none;"></div>
|
| 190 |
-
<div id="errorBox" class="error"></div>
|
| 191 |
|
| 192 |
-
|
| 193 |
-
Built with FastAPI · FAISS · Gemini
|
| 194 |
-
</footer>
|
| 195 |
-
</div>
|
| 196 |
|
| 197 |
<script>
|
| 198 |
let busy = false;
|
|
@@ -206,29 +209,37 @@
|
|
| 206 |
|
| 207 |
async function upload() {
|
| 208 |
const files = document.getElementById("files").files;
|
| 209 |
-
if (!files.length)
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
setBusy(true);
|
| 212 |
-
document.getElementById("uploadStatus")
|
|
|
|
| 213 |
|
| 214 |
const fd = new FormData();
|
| 215 |
for (let f of files) fd.append("files", f);
|
| 216 |
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
setBusy(false);
|
| 222 |
}
|
| 223 |
|
| 224 |
async function ask() {
|
| 225 |
const q = document.getElementById("question").value.trim();
|
| 226 |
-
if (!q
|
| 227 |
|
| 228 |
setBusy(true);
|
| 229 |
-
document.getElementById("
|
| 230 |
-
|
| 231 |
-
|
| 232 |
|
| 233 |
try {
|
| 234 |
const res = await fetch("/ask", {
|
|
@@ -238,20 +249,34 @@
|
|
| 238 |
});
|
| 239 |
|
| 240 |
const data = await res.json();
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
}
|
| 246 |
|
| 247 |
setBusy(false);
|
| 248 |
}
|
| 249 |
|
| 250 |
function summarize() {
|
| 251 |
-
document.getElementById("question").value =
|
| 252 |
-
"Summarize the uploaded documents in 5 bullet points.";
|
| 253 |
ask();
|
| 254 |
}
|
| 255 |
</script>
|
| 256 |
</body>
|
| 257 |
-
</html>
|
|
|
|
| 5 |
<title>Gemini RAG Assistant</title>
|
| 6 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 7 |
|
|
|
|
| 8 |
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
|
| 9 |
|
| 10 |
<style>
|
| 11 |
:root {
|
| 12 |
--bg: radial-gradient(1200px 600px at top, #e0e7ff 0%, #f8fafc 60%);
|
| 13 |
+
--card: rgba(255,255,255,0.9);
|
| 14 |
--border: rgba(15,23,42,0.08);
|
| 15 |
--primary: #4f46e5;
|
| 16 |
--secondary: #0ea5e9;
|
| 17 |
--text: #0f172a;
|
| 18 |
--muted: #64748b;
|
| 19 |
--error: #dc2626;
|
| 20 |
+
--success: #16a34a;
|
| 21 |
}
|
| 22 |
|
| 23 |
* { box-sizing: border-box; font-family: Inter, sans-serif; }
|
|
|
|
| 34 |
|
| 35 |
.container {
|
| 36 |
width: 100%;
|
| 37 |
+
max-width: 800px;
|
| 38 |
background: var(--card);
|
| 39 |
backdrop-filter: blur(16px);
|
| 40 |
border-radius: 24px;
|
|
|
|
| 78 |
border-radius: 14px;
|
| 79 |
border: 1px solid var(--border);
|
| 80 |
font-size: 0.95rem;
|
| 81 |
+
background: #fafafa;
|
| 82 |
}
|
| 83 |
|
| 84 |
textarea {
|
| 85 |
+
min-height: 100px;
|
| 86 |
resize: vertical;
|
| 87 |
}
|
| 88 |
|
|
|
|
| 94 |
}
|
| 95 |
|
| 96 |
button {
|
| 97 |
+
padding: 12px 24px;
|
| 98 |
border-radius: 14px;
|
| 99 |
border: none;
|
| 100 |
background: var(--primary);
|
|
|
|
| 113 |
|
| 114 |
button:hover:not(:disabled) {
|
| 115 |
transform: translateY(-1px);
|
| 116 |
+
box-shadow: 0 4px 12px rgba(79,70,229,.2);
|
| 117 |
}
|
| 118 |
|
| 119 |
.status {
|
|
|
|
| 124 |
|
| 125 |
.answer {
|
| 126 |
margin-top: 24px;
|
| 127 |
+
padding: 22px;
|
| 128 |
border-radius: 16px;
|
| 129 |
+
background: #fff;
|
| 130 |
border: 1px solid var(--border);
|
|
|
|
| 131 |
line-height: 1.6;
|
| 132 |
+
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05);
|
| 133 |
}
|
| 134 |
|
| 135 |
+
.confidence-badge {
|
| 136 |
+
display: inline-block;
|
| 137 |
+
margin-top: 12px;
|
| 138 |
+
padding: 4px 12px;
|
| 139 |
+
border-radius: 20px;
|
| 140 |
+
background: #dcfce7;
|
| 141 |
+
color: #166534;
|
| 142 |
+
font-size: 0.8rem;
|
| 143 |
+
font-weight: 600;
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
.citations {
|
| 147 |
+
margin-top: 16px;
|
| 148 |
+
font-size: .85rem;
|
| 149 |
+
color: var(--muted);
|
| 150 |
+
border-top: 1px solid var(--border);
|
| 151 |
+
padding-top: 12px;
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
.citations ul {
|
| 155 |
+
margin: 6px 0 0;
|
| 156 |
+
padding-left: 20px;
|
| 157 |
}
|
| 158 |
|
| 159 |
.loader {
|
|
|
|
| 167 |
50% { opacity: 1 }
|
| 168 |
100% { opacity: .4 }
|
| 169 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
</style>
|
| 171 |
</head>
|
| 172 |
|
| 173 |
<body>
|
| 174 |
+
<div class="container">
|
| 175 |
+
<h1>Gemini RAG Assistant</h1>
|
| 176 |
+
<div class="subtitle">Upload documents · Ask questions · Get grounded answers</div>
|
| 177 |
+
|
| 178 |
+
<div class="card">
|
| 179 |
+
<h3>1. Upload Knowledge</h3>
|
| 180 |
+
<input type="file" id="files" multiple accept=".pdf,.txt"/>
|
| 181 |
+
<div class="row">
|
| 182 |
+
<button id="uploadBtn" onclick="upload()">Upload & Index Files</button>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
</div>
|
| 184 |
+
<div id="uploadStatus" class="status"></div>
|
| 185 |
+
</div>
|
| 186 |
|
| 187 |
+
<div class="card">
|
| 188 |
+
<h3>2. Ask or Summarize</h3>
|
| 189 |
+
<textarea id="question" placeholder="E.g., 'What are the main risks?' or 'Summarize the document'"></textarea>
|
| 190 |
+
<div class="row">
|
| 191 |
+
<button id="askBtn" onclick="ask()">Ask Question</button>
|
| 192 |
+
<button class="secondary" id="sumBtn" onclick="summarize()">Generate Summary</button>
|
|
|
|
|
|
|
| 193 |
</div>
|
| 194 |
+
</div>
|
| 195 |
|
| 196 |
+
<div id="answerBox" class="answer" style="display:none;"></div>
|
|
|
|
|
|
|
| 197 |
|
| 198 |
+
</div>
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
<script>
|
| 201 |
let busy = false;
|
|
|
|
| 209 |
|
| 210 |
async function upload() {
|
| 211 |
const files = document.getElementById("files").files;
|
| 212 |
+
if (!files.length) {
|
| 213 |
+
alert("Please select files first.");
|
| 214 |
+
return;
|
| 215 |
+
}
|
| 216 |
|
| 217 |
setBusy(true);
|
| 218 |
+
const statusDiv = document.getElementById("uploadStatus");
|
| 219 |
+
statusDiv.innerText = "Indexing documents... this may take a moment.";
|
| 220 |
|
| 221 |
const fd = new FormData();
|
| 222 |
for (let f of files) fd.append("files", f);
|
| 223 |
|
| 224 |
+
try {
|
| 225 |
+
const res = await fetch("/upload", { method: "POST", body: fd });
|
| 226 |
+
if (!res.ok) throw new Error("Upload failed");
|
| 227 |
+
const data = await res.json();
|
| 228 |
+
statusDiv.innerText = data.message || "Done ✅";
|
| 229 |
+
} catch (e) {
|
| 230 |
+
statusDiv.innerText = "Error uploading files.";
|
| 231 |
+
}
|
| 232 |
setBusy(false);
|
| 233 |
}
|
| 234 |
|
| 235 |
async function ask() {
|
| 236 |
const q = document.getElementById("question").value.trim();
|
| 237 |
+
if (!q) return;
|
| 238 |
|
| 239 |
setBusy(true);
|
| 240 |
+
const box = document.getElementById("answerBox");
|
| 241 |
+
box.style.display = "block";
|
| 242 |
+
box.innerHTML = "<span class='loader'>Thinking...</span>";
|
| 243 |
|
| 244 |
try {
|
| 245 |
const res = await fetch("/ask", {
|
|
|
|
| 249 |
});
|
| 250 |
|
| 251 |
const data = await res.json();
|
| 252 |
+
|
| 253 |
+
let html = `<div><strong>Answer:</strong><br>${data.answer.replace(/\n/g, '<br>')}</div>`;
|
| 254 |
+
|
| 255 |
+
if (data.confidence > 0) {
|
| 256 |
+
html += `<div class="confidence-badge">Confidence: ${(data.confidence * 100).toFixed(0)}%</div>`;
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
if (data.citations && data.citations.length > 0) {
|
| 260 |
+
html += `<div class="citations"><strong>Sources:</strong><ul>`;
|
| 261 |
+
data.citations.forEach(c => {
|
| 262 |
+
html += `<li>${c.source} (Page ${c.page})</li>`;
|
| 263 |
+
});
|
| 264 |
+
html += `</ul></div>`;
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
box.innerHTML = html;
|
| 268 |
+
|
| 269 |
+
} catch (e) {
|
| 270 |
+
box.innerText = "⚠️ Error communicating with the server.";
|
| 271 |
}
|
| 272 |
|
| 273 |
setBusy(false);
|
| 274 |
}
|
| 275 |
|
| 276 |
function summarize() {
|
| 277 |
+
document.getElementById("question").value = "Summarize the uploaded documents";
|
|
|
|
| 278 |
ask();
|
| 279 |
}
|
| 280 |
</script>
|
| 281 |
</body>
|
| 282 |
+
</html>
|
main.py
CHANGED
|
@@ -8,23 +8,22 @@ from pydantic import BaseModel
|
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
import google.generativeai as genai
|
| 10 |
|
| 11 |
-
from rag_store import ingest_documents, search_knowledge
|
| 12 |
|
| 13 |
-
#
|
| 14 |
-
#
|
| 15 |
-
#
|
| 16 |
load_dotenv()
|
| 17 |
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
| 24 |
|
| 25 |
-
# -----------------------
|
| 26 |
-
# CORS
|
| 27 |
-
# -----------------------
|
| 28 |
app.add_middleware(
|
| 29 |
CORSMiddleware,
|
| 30 |
allow_origins=["*"],
|
|
@@ -32,107 +31,210 @@ app.add_middleware(
|
|
| 32 |
allow_headers=["*"],
|
| 33 |
)
|
| 34 |
|
| 35 |
-
# -----------------------
|
| 36 |
-
# Frontend
|
| 37 |
-
# -----------------------
|
| 38 |
app.mount("/frontend", StaticFiles(directory="frontend"), name="frontend")
|
| 39 |
|
| 40 |
-
#
|
| 41 |
-
#
|
| 42 |
-
#
|
| 43 |
-
CACHE_TTL = 300 #
|
| 44 |
-
answer_cache = {}
|
| 45 |
|
| 46 |
-
#
|
| 47 |
-
#
|
| 48 |
-
#
|
| 49 |
class PromptRequest(BaseModel):
|
| 50 |
prompt: str
|
| 51 |
|
| 52 |
-
#
|
| 53 |
-
#
|
| 54 |
-
#
|
| 55 |
-
|
| 56 |
@app.get("/", response_class=HTMLResponse)
|
| 57 |
def serve_ui():
|
| 58 |
with open("frontend/index.html", "r", encoding="utf-8") as f:
|
| 59 |
return f.read()
|
| 60 |
|
| 61 |
-
#
|
| 62 |
-
#
|
| 63 |
-
#
|
| 64 |
@app.post("/upload")
|
| 65 |
async def upload(files: list[UploadFile] = File(...)):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
chunks = ingest_documents(files)
|
| 68 |
-
return {"message": f"
|
| 69 |
except Exception as e:
|
| 70 |
return JSONResponse(status_code=400, content={"error": str(e)})
|
| 71 |
|
| 72 |
-
#
|
| 73 |
-
#
|
| 74 |
-
#
|
| 75 |
@app.post("/ask")
|
| 76 |
async def ask(data: PromptRequest):
|
| 77 |
-
|
|
|
|
| 78 |
now = time()
|
| 79 |
|
| 80 |
-
#
|
| 81 |
-
if
|
| 82 |
-
ts, cached = answer_cache[
|
| 83 |
if now - ts < CACHE_TTL:
|
| 84 |
return cached
|
| 85 |
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
if not results:
|
| 88 |
response = {
|
| 89 |
"answer": "I don't know based on the provided documents.",
|
| 90 |
"confidence": 0.0,
|
| 91 |
"citations": []
|
| 92 |
}
|
| 93 |
-
answer_cache[
|
| 94 |
return response
|
| 95 |
|
| 96 |
context = "\n\n".join(r["text"] for r in results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
prompt = f"""
|
| 99 |
-
Answer
|
| 100 |
-
If not
|
| 101 |
|
| 102 |
Context:
|
| 103 |
{context}
|
| 104 |
|
| 105 |
Question:
|
| 106 |
-
{
|
| 107 |
"""
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
status_code=429,
|
| 128 |
-
content={"error": "LLM quota exceeded. Please wait and retry."}
|
| 129 |
-
)
|
| 130 |
-
|
| 131 |
-
# -----------------------
|
| 132 |
-
# Summarize
|
| 133 |
-
# -----------------------
|
| 134 |
-
@app.post("/summarize")
|
| 135 |
-
async def summarize():
|
| 136 |
-
return await ask(PromptRequest(
|
| 137 |
-
prompt="Summarize the uploaded documents in 5 concise bullet points."
|
| 138 |
-
))
|
|
|
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
import google.generativeai as genai
|
| 10 |
|
| 11 |
+
from rag_store import ingest_documents, search_knowledge, get_all_chunks, clear_database
|
| 12 |
|
| 13 |
+
# =========================================================
|
| 14 |
+
# ENV + MODEL SETUP
|
| 15 |
+
# =========================================================
|
| 16 |
load_dotenv()
|
| 17 |
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
|
| 18 |
|
| 19 |
+
MODEL_NAME = "gemini-2.5-flash"
|
| 20 |
+
USE_MOCK = False # Set to False to use real API
|
| 21 |
+
|
| 22 |
+
# =========================================================
|
| 23 |
+
# APP
|
| 24 |
+
# =========================================================
|
| 25 |
+
app = FastAPI(title="Gemini RAG FastAPI")
|
| 26 |
|
|
|
|
|
|
|
|
|
|
| 27 |
app.add_middleware(
|
| 28 |
CORSMiddleware,
|
| 29 |
allow_origins=["*"],
|
|
|
|
| 31 |
allow_headers=["*"],
|
| 32 |
)
|
| 33 |
|
|
|
|
|
|
|
|
|
|
| 34 |
app.mount("/frontend", StaticFiles(directory="frontend"), name="frontend")
|
| 35 |
|
| 36 |
+
# =========================================================
|
| 37 |
+
# CACHE (ANTI-429)
|
| 38 |
+
# =========================================================
|
| 39 |
+
CACHE_TTL = 300 # 5 minutes
|
| 40 |
+
answer_cache: dict[str, tuple[float, dict]] = {}
|
| 41 |
|
| 42 |
+
# =========================================================
|
| 43 |
+
# MODELS
|
| 44 |
+
# =========================================================
|
| 45 |
class PromptRequest(BaseModel):
|
| 46 |
prompt: str
|
| 47 |
|
| 48 |
+
# =========================================================
|
| 49 |
+
# ROUTES
|
| 50 |
+
# =========================================================
|
|
|
|
| 51 |
@app.get("/", response_class=HTMLResponse)
|
| 52 |
def serve_ui():
|
| 53 |
with open("frontend/index.html", "r", encoding="utf-8") as f:
|
| 54 |
return f.read()
|
| 55 |
|
| 56 |
+
# ---------------------------------------------------------
|
| 57 |
+
# UPLOAD
|
| 58 |
+
# ---------------------------------------------------------
|
| 59 |
@app.post("/upload")
|
| 60 |
async def upload(files: list[UploadFile] = File(...)):
|
| 61 |
+
# 1. VALIDATION: Strict File Type Check
|
| 62 |
+
for file in files:
|
| 63 |
+
ext = file.filename.split(".")[-1].lower()
|
| 64 |
+
if ext not in ["pdf", "txt"]:
|
| 65 |
+
return JSONResponse(
|
| 66 |
+
status_code=400,
|
| 67 |
+
content={"error": f"Invalid file type: '{file.filename}'. Only .pdf and .txt files are allowed."}
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
try:
|
| 71 |
+
# 2. CLEAR CONTEXT: Start fresh for every upload session
|
| 72 |
+
clear_database()
|
| 73 |
+
answer_cache.clear() # <--- CRITICAL: Clear the questions cache too!
|
| 74 |
+
|
| 75 |
+
# 3. INGEST
|
| 76 |
chunks = ingest_documents(files)
|
| 77 |
+
return {"message": f"Successfully indexed {chunks} chunks. Previous context cleared."}
|
| 78 |
except Exception as e:
|
| 79 |
return JSONResponse(status_code=400, content={"error": str(e)})
|
| 80 |
|
| 81 |
+
# ---------------------------------------------------------
|
| 82 |
+
# ASK / SUMMARIZE
|
| 83 |
+
# ---------------------------------------------------------
|
| 84 |
@app.post("/ask")
|
| 85 |
async def ask(data: PromptRequest):
|
| 86 |
+
prompt_text = data.prompt.strip()
|
| 87 |
+
key = prompt_text.lower()
|
| 88 |
now = time()
|
| 89 |
|
| 90 |
+
# ---------- CACHE ----------
|
| 91 |
+
if key in answer_cache:
|
| 92 |
+
ts, cached = answer_cache[key]
|
| 93 |
if now - ts < CACHE_TTL:
|
| 94 |
return cached
|
| 95 |
|
| 96 |
+
model = genai.GenerativeModel(MODEL_NAME)
|
| 97 |
+
is_summary = "summarize" in key or "summary" in key
|
| 98 |
+
|
| 99 |
+
# =====================================================
|
| 100 |
+
# 🟦 SUMMARY MODE (MAP–REDUCE)
|
| 101 |
+
# =====================================================
|
| 102 |
+
# Helper for rate-limit aware generation
|
| 103 |
+
def generate_safe(prompt_content, retries=5):
|
| 104 |
+
if USE_MOCK:
|
| 105 |
+
import time as pytime
|
| 106 |
+
pytime.sleep(1.5) # Simulate latency
|
| 107 |
+
class MockResp:
|
| 108 |
+
def __init__(self, text): self.text = text
|
| 109 |
+
@property
|
| 110 |
+
def prompt_feedback(self): return None
|
| 111 |
+
|
| 112 |
+
if "Summarize" in str(prompt_content):
|
| 113 |
+
return MockResp("- This is a mock summary point 1 (API limit reached).\n- This is point 2 demonstrating the UI works.\n- Point 3: The backend logic is sound.")
|
| 114 |
+
elif "Combine" in str(prompt_content):
|
| 115 |
+
return MockResp("Here are the final summarized points (MOCK MODE):\n\n* **System Integrity**: The RAG system is functioning correctly, handling file ingestion and chunking.\n* **Resilience**: Error handling and retry mechanisms are now in place.\n* **Mocking**: We are currently bypassing the live API to verify the frontend pipeline.\n* **Ready**: Once quotas reset, simply set USE_MOCK = False to resume live intelligence.\n* **Success**: The overall architecture is validated.")
|
| 116 |
+
else:
|
| 117 |
+
return MockResp("I am functioning in MOCK MODE because the daily API quota is exhausted. I cannot answer specific questions right now, but I confirm the system received your question: " + str(prompt_content)[:50] + "...")
|
| 118 |
+
|
| 119 |
+
import time as pytime
|
| 120 |
+
base_delay = 10
|
| 121 |
+
for attempt in range(retries + 1):
|
| 122 |
+
try:
|
| 123 |
+
# Always small delay to be nice to the API
|
| 124 |
+
pytime.sleep(2.0)
|
| 125 |
+
response = model.generate_content(prompt_content)
|
| 126 |
+
return response
|
| 127 |
+
except Exception as e:
|
| 128 |
+
err_str = str(e)
|
| 129 |
+
if "429" in err_str:
|
| 130 |
+
if attempt < retries:
|
| 131 |
+
wait_time = base_delay * (2 ** attempt)
|
| 132 |
+
print(f"DEBUG: 429 Rate limit hit. Retrying in {wait_time}s...")
|
| 133 |
+
pytime.sleep(wait_time)
|
| 134 |
+
continue
|
| 135 |
+
raise e
|
| 136 |
+
|
| 137 |
+
if is_summary:
|
| 138 |
+
chunks = get_all_chunks(limit=80)
|
| 139 |
+
print(f"DEBUG: Found {len(chunks)} chunks for summary.")
|
| 140 |
+
|
| 141 |
+
if not chunks:
|
| 142 |
+
return {
|
| 143 |
+
"answer": "No documents available to summarize.",
|
| 144 |
+
"confidence": 0.0,
|
| 145 |
+
"citations": []
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
# -----------------------------------------------------
|
| 149 |
+
# REFACTORED: Single-Shot Summary (Avoids Rate Limits)
|
| 150 |
+
# -----------------------------------------------------
|
| 151 |
+
all_text = "\n\n".join(c["text"] for c in chunks)
|
| 152 |
+
print(f"DEBUG: Total text length for summary: {len(all_text)} chars")
|
| 153 |
+
|
| 154 |
+
prompt = f"""
|
| 155 |
+
Summarize the following content in 5 clear, high-level bullet points.
|
| 156 |
+
|
| 157 |
+
Content:
|
| 158 |
+
{all_text}
|
| 159 |
+
"""
|
| 160 |
+
try:
|
| 161 |
+
# Single call with retry logic
|
| 162 |
+
resp = generate_safe(prompt)
|
| 163 |
+
print("DEBUG: Summary generation successful.")
|
| 164 |
+
|
| 165 |
+
final_text = "Analysis complete."
|
| 166 |
+
try:
|
| 167 |
+
final_text = resp.text
|
| 168 |
+
except ValueError:
|
| 169 |
+
final_text = "Summary generation was blocked by safety filters."
|
| 170 |
+
|
| 171 |
+
response = {
|
| 172 |
+
"answer": final_text,
|
| 173 |
+
"confidence": 0.95,
|
| 174 |
+
"citations": list({
|
| 175 |
+
(c["metadata"]["source"], c["metadata"]["page"]): c["metadata"]
|
| 176 |
+
for c in chunks
|
| 177 |
+
}.values())
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
answer_cache[key] = (now, response)
|
| 181 |
+
return response
|
| 182 |
+
|
| 183 |
+
except Exception as e:
|
| 184 |
+
print(f"Summary failed: {e}")
|
| 185 |
+
return JSONResponse(status_code=200, content={
|
| 186 |
+
"answer": f"System is currently overloaded (Rate Limit). Please try again in a minute.\nDetails: {str(e)}",
|
| 187 |
+
"confidence": 0.0,
|
| 188 |
+
"citations": []
|
| 189 |
+
})
|
| 190 |
+
|
| 191 |
+
# =====================================================
|
| 192 |
+
# 🟩 Q&A MODE (RAG)
|
| 193 |
+
# =====================================================
|
| 194 |
+
results = search_knowledge(prompt_text)
|
| 195 |
+
|
| 196 |
if not results:
|
| 197 |
response = {
|
| 198 |
"answer": "I don't know based on the provided documents.",
|
| 199 |
"confidence": 0.0,
|
| 200 |
"citations": []
|
| 201 |
}
|
| 202 |
+
answer_cache[key] = (now, response)
|
| 203 |
return response
|
| 204 |
|
| 205 |
context = "\n\n".join(r["text"] for r in results)
|
| 206 |
+
|
| 207 |
+
# DEBUG: Log the context to see what the model is reading
|
| 208 |
+
print("DEBUG: ------------------- RAG CONTEXT -------------------")
|
| 209 |
+
print(context[:2000] + ("..." if len(context) > 2000 else ""))
|
| 210 |
+
print("DEBUG: ---------------------------------------------------")
|
| 211 |
|
| 212 |
prompt = f"""
|
| 213 |
+
Answer using ONLY the context below.
|
| 214 |
+
If the answer is not present, say "I don't know".
|
| 215 |
|
| 216 |
Context:
|
| 217 |
{context}
|
| 218 |
|
| 219 |
Question:
|
| 220 |
+
{prompt_text}
|
| 221 |
"""
|
| 222 |
+
llm = model.generate_content(prompt)
|
| 223 |
+
answer_text = llm.text
|
| 224 |
+
|
| 225 |
+
# Fix Fake Confidence: If the model says "I don't know", confidence should be 0.
|
| 226 |
+
confidence = round(min(1.0, len(results) / 5), 2)
|
| 227 |
+
if "i don't know" in answer_text.lower():
|
| 228 |
+
confidence = 0.0
|
| 229 |
+
|
| 230 |
+
response = {
|
| 231 |
+
"answer": answer_text,
|
| 232 |
+
"confidence": confidence,
|
| 233 |
+
"citations": list({
|
| 234 |
+
(r["metadata"]["source"], r["metadata"]["page"]): r["metadata"]
|
| 235 |
+
for r in results
|
| 236 |
+
}.values())
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
answer_cache[key] = (now, response)
|
| 240 |
+
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rag_store.py
CHANGED
|
@@ -1,20 +1,85 @@
|
|
| 1 |
-
import os
|
| 2 |
import faiss
|
| 3 |
import numpy as np
|
|
|
|
|
|
|
| 4 |
from pypdf import PdfReader
|
| 5 |
from sentence_transformers import SentenceTransformer
|
| 6 |
|
| 7 |
# -----------------------
|
| 8 |
-
# Global
|
| 9 |
# -----------------------
|
| 10 |
index = None
|
| 11 |
documents = []
|
| 12 |
metadata = []
|
| 13 |
|
|
|
|
| 14 |
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
# -----------------------
|
| 17 |
-
# Ingest
|
| 18 |
# -----------------------
|
| 19 |
def ingest_documents(files):
|
| 20 |
global index, documents, metadata
|
|
@@ -24,55 +89,98 @@ def ingest_documents(files):
|
|
| 24 |
|
| 25 |
for file in files:
|
| 26 |
filename = file.filename
|
| 27 |
-
|
|
|
|
| 28 |
if filename.endswith(".pdf"):
|
| 29 |
reader = PdfReader(file.file)
|
| 30 |
for i, page in enumerate(reader.pages):
|
| 31 |
-
|
| 32 |
-
if
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
"source": filename,
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
elif filename.endswith(".txt"):
|
| 40 |
content = file.file.read().decode("utf-8")
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
"source": filename,
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
if not texts:
|
| 48 |
-
raise ValueError("No readable text found.")
|
| 49 |
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
-
index
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
-
documents
|
| 56 |
-
metadata
|
| 57 |
|
| 58 |
-
|
|
|
|
|
|
|
| 59 |
|
| 60 |
# -----------------------
|
| 61 |
-
# Search
|
| 62 |
# -----------------------
|
| 63 |
-
def search_knowledge(query, top_k=5):
|
| 64 |
if index is None:
|
| 65 |
return []
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
results = []
|
| 71 |
-
|
|
|
|
| 72 |
results.append({
|
| 73 |
-
"text":
|
| 74 |
-
"
|
| 75 |
-
"metadata": metadata[idx]
|
| 76 |
})
|
| 77 |
|
| 78 |
-
return results
|
|
|
|
|
|
|
| 1 |
import faiss
|
| 2 |
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
import pickle
|
| 5 |
from pypdf import PdfReader
|
| 6 |
from sentence_transformers import SentenceTransformer
|
| 7 |
|
| 8 |
# -----------------------
|
| 9 |
+
# Global state
|
| 10 |
# -----------------------
|
| 11 |
index = None
|
| 12 |
documents = []
|
| 13 |
metadata = []
|
| 14 |
|
| 15 |
+
# Using a lightweight, high-performance embedding model
|
| 16 |
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
| 17 |
|
| 18 |
+
CHUNK_SIZE = 800
|
| 19 |
+
CHUNK_OVERLAP = 200
|
| 20 |
+
|
| 21 |
+
DB_FILE_INDEX = "vector.index"
|
| 22 |
+
DB_FILE_META = "metadata.pkl"
|
| 23 |
+
|
| 24 |
+
# -----------------------
|
| 25 |
+
# Helpers
|
| 26 |
+
# -----------------------
|
| 27 |
+
def chunk_text(text):
|
| 28 |
+
"""Splits text into overlapping chunks."""
|
| 29 |
+
chunks = []
|
| 30 |
+
start = 0
|
| 31 |
+
while start < len(text):
|
| 32 |
+
end = start + CHUNK_SIZE
|
| 33 |
+
chunks.append(text[start:end])
|
| 34 |
+
start += CHUNK_SIZE - CHUNK_OVERLAP
|
| 35 |
+
return chunks
|
| 36 |
+
|
| 37 |
+
def save_db():
|
| 38 |
+
global index, documents, metadata
|
| 39 |
+
if index:
|
| 40 |
+
faiss.write_index(index, DB_FILE_INDEX)
|
| 41 |
+
if documents:
|
| 42 |
+
with open(DB_FILE_META, "wb") as f:
|
| 43 |
+
pickle.dump({"documents": documents, "metadata": metadata}, f)
|
| 44 |
+
print("DEBUG: Knowledge base saved to disk.")
|
| 45 |
+
|
| 46 |
+
def load_db():
|
| 47 |
+
global index, documents, metadata
|
| 48 |
+
if os.path.exists(DB_FILE_INDEX) and os.path.exists(DB_FILE_META):
|
| 49 |
+
try:
|
| 50 |
+
index = faiss.read_index(DB_FILE_INDEX)
|
| 51 |
+
with open(DB_FILE_META, "rb") as f:
|
| 52 |
+
data = pickle.load(f)
|
| 53 |
+
documents = data["documents"]
|
| 54 |
+
metadata = data["metadata"]
|
| 55 |
+
print(f"DEBUG: Loaded {len(documents)} documents from disk.")
|
| 56 |
+
except Exception as e:
|
| 57 |
+
print(f"DEBUG: Failed to load DB: {e}")
|
| 58 |
+
index = None
|
| 59 |
+
documents = []
|
| 60 |
+
metadata = []
|
| 61 |
+
else:
|
| 62 |
+
print("DEBUG: No existing DB found. Starting fresh.")
|
| 63 |
+
|
| 64 |
+
# Auto-load on startup
|
| 65 |
+
load_db()
|
| 66 |
+
|
| 67 |
+
def clear_database():
|
| 68 |
+
global index, documents, metadata
|
| 69 |
+
index = None
|
| 70 |
+
documents = []
|
| 71 |
+
metadata = []
|
| 72 |
+
|
| 73 |
+
# Remove persistence files if they exist
|
| 74 |
+
if os.path.exists(DB_FILE_INDEX):
|
| 75 |
+
os.remove(DB_FILE_INDEX)
|
| 76 |
+
if os.path.exists(DB_FILE_META):
|
| 77 |
+
os.remove(DB_FILE_META)
|
| 78 |
+
|
| 79 |
+
print("DEBUG: Database cleared.")
|
| 80 |
+
|
| 81 |
# -----------------------
|
| 82 |
+
# Ingest
|
| 83 |
# -----------------------
|
| 84 |
def ingest_documents(files):
|
| 85 |
global index, documents, metadata
|
|
|
|
| 89 |
|
| 90 |
for file in files:
|
| 91 |
filename = file.filename
|
| 92 |
+
|
| 93 |
+
# Handle PDFs
|
| 94 |
if filename.endswith(".pdf"):
|
| 95 |
reader = PdfReader(file.file)
|
| 96 |
for i, page in enumerate(reader.pages):
|
| 97 |
+
page_text = page.extract_text()
|
| 98 |
+
if page_text:
|
| 99 |
+
for chunk in chunk_text(page_text):
|
| 100 |
+
texts.append(chunk)
|
| 101 |
+
meta.append({"source": filename, "page": i + 1})
|
| 102 |
+
|
| 103 |
+
# Handle Text files
|
|
|
|
| 104 |
elif filename.endswith(".txt"):
|
| 105 |
content = file.file.read().decode("utf-8")
|
| 106 |
+
for chunk in chunk_text(content):
|
| 107 |
+
texts.append(chunk)
|
| 108 |
+
meta.append({"source": filename, "page": "N/A"})
|
| 109 |
+
|
| 110 |
+
# Check for empty or unreadable content
|
| 111 |
+
total_length = sum(len(t) for t in texts)
|
| 112 |
+
if total_length < 50:
|
| 113 |
+
raise ValueError(
|
| 114 |
+
"Extracted text is too short or empty. "
|
| 115 |
+
"If this is a PDF, it might be a scanned image without a text layer. "
|
| 116 |
+
"Please use a text-selectable PDF or a .txt file."
|
| 117 |
+
)
|
| 118 |
|
| 119 |
if not texts:
|
| 120 |
+
raise ValueError("No readable text found in documents.")
|
| 121 |
|
| 122 |
+
# Create Embeddings (Normalized for better cosine similarity)
|
| 123 |
+
# append to existing if needed, but for now simplistic re-build or append?
|
| 124 |
+
# Simpler to just ADD to the existing index.
|
| 125 |
+
|
| 126 |
+
new_embeddings = embedder.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
|
| 127 |
|
| 128 |
+
if index is None:
|
| 129 |
+
# USE INNER PRODUCT (Cosine Similarity) for normalized vectors
|
| 130 |
+
index = faiss.IndexFlatIP(new_embeddings.shape[1])
|
| 131 |
+
|
| 132 |
+
index.add(new_embeddings)
|
| 133 |
|
| 134 |
+
documents.extend(texts)
|
| 135 |
+
metadata.extend(meta)
|
| 136 |
|
| 137 |
+
save_db()
|
| 138 |
+
|
| 139 |
+
return len(documents)
|
| 140 |
|
| 141 |
# -----------------------
|
| 142 |
+
# Q&A Search (filtered)
|
| 143 |
# -----------------------
|
| 144 |
+
def search_knowledge(query, top_k=5, min_similarity=0.3):
|
| 145 |
if index is None:
|
| 146 |
return []
|
| 147 |
|
| 148 |
+
# SEARCH with normalized query
|
| 149 |
+
query_vec = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True)
|
| 150 |
+
|
| 151 |
+
# FAISS returns scores (dot product), which = cosine similarity for normalized vectors
|
| 152 |
+
scores, indices = index.search(query_vec, top_k)
|
| 153 |
+
|
| 154 |
+
results = []
|
| 155 |
+
print(f"DEBUG: Query: '{query}'")
|
| 156 |
+
for idx, score in zip(indices[0], scores[0]):
|
| 157 |
+
if idx == -1: continue # FAISS padding
|
| 158 |
+
|
| 159 |
+
print(f"DEBUG: Found chunk {idx} with score {score:.4f}")
|
| 160 |
+
|
| 161 |
+
# Filter out results that are too irrelevant (score too low)
|
| 162 |
+
if score > min_similarity:
|
| 163 |
+
results.append({
|
| 164 |
+
"text": documents[idx],
|
| 165 |
+
"metadata": metadata[idx],
|
| 166 |
+
"score": float(score)
|
| 167 |
+
})
|
| 168 |
+
|
| 169 |
+
return results
|
| 170 |
+
|
| 171 |
+
# -----------------------
|
| 172 |
+
# Summary Retrieval (NO FILTER)
|
| 173 |
+
# -----------------------
|
| 174 |
+
def get_all_chunks(limit=50):
|
| 175 |
+
if not documents:
|
| 176 |
+
return []
|
| 177 |
|
| 178 |
results = []
|
| 179 |
+
# Return a sample of chunks for summarization
|
| 180 |
+
for text, meta in zip(documents[:limit], metadata[:limit]):
|
| 181 |
results.append({
|
| 182 |
+
"text": text,
|
| 183 |
+
"metadata": meta
|
|
|
|
| 184 |
})
|
| 185 |
|
| 186 |
+
return results
|