lvvignesh2122 commited on
Commit
5bf211e
·
1 Parent(s): be6b61f

feat: implement multi-agent RAG with self-correction and web search

Browse files

Added Supervisor-Worker architecture, Skeptical Verifier node, and Tavily API integration for external grounding.

Files changed (3) hide show
  1. agentic_rag_v2_graph.py +205 -288
  2. eval_logger.py +4 -2
  3. main.py +87 -21
agentic_rag_v2_graph.py CHANGED
@@ -1,296 +1,230 @@
1
- from typing import TypedDict, List, Optional, Annotated
 
 
2
  import google.generativeai as genai
3
  from langgraph.graph import StateGraph, END
4
  from langgraph.checkpoint.memory import MemorySaver
5
  from langgraph.graph.message import add_messages
6
- from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
7
- import time
8
- import random
9
 
10
  from rag_store import search_knowledge
11
  from eval_logger import log_eval
12
  from llm_utils import generate_with_retry
13
 
 
14
  MODEL_NAME = "gemini-2.5-flash"
15
  MAX_RETRIES = 2
16
 
17
-
18
-
19
-
20
- def format_history(messages: List[BaseMessage]) -> str:
21
- history_str = ""
22
- for msg in messages:
23
- role = "User" if isinstance(msg, HumanMessage) else "Assistant"
24
- history_str += f"{role}: {msg.content}\n"
25
- return history_str
26
-
27
  # ===============================
28
  # STATE
29
  # ===============================
30
  class AgentState(TypedDict):
31
  messages: Annotated[List[BaseMessage], add_messages]
32
  query: str
33
- refined_query: str
34
- decision: str
35
- retrieved_chunks: List[dict]
36
- retrieval_quality: str
 
 
 
37
  retries: int
38
- answer: Optional[str]
39
- confidence: float
40
- answer_known: bool
41
-
42
 
43
  # ===============================
44
- # LLM DECISION NODE (PLANNER)
45
  # ===============================
46
- def llm_decision_node(state: AgentState) -> AgentState:
47
- history = format_history(state.get("messages", []))
48
- prompt = f"""
49
- You are an AI agent deciding whether a question requires document retrieval.
50
- Answer ONLY one word:
51
- - use_rag
52
- - no_rag
53
-
54
- Conversation History:
55
- {history}
56
-
57
- Current Question:
58
- {state["query"]}
59
- """
60
- model = genai.GenerativeModel(MODEL_NAME)
61
- resp = generate_with_retry(model, prompt)
62
-
63
- decision = "use_rag"
64
- if resp and "no_rag" in resp.text.lower():
65
- decision = "no_rag"
66
-
67
- return {**state, "decision": decision}
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  # ===============================
71
- # RETRIEVAL NODE (TOOL)
72
  # ===============================
73
- def retrieve_node(state: AgentState) -> AgentState:
74
- q = state["refined_query"] or state["query"]
75
- chunks = search_knowledge(q)
76
- return {**state, "retrieved_chunks": chunks}
77
-
78
 
79
- # ===============================
80
- # GRADE DOCUMENTS NODE (GRADER)
81
- # ===============================
82
- def grade_documents_node(state: AgentState) -> AgentState:
83
- """
84
- Determines whether the retrieved documents are relevant to the question.
85
- """
86
  query = state["query"]
87
- retrieved_docs = state["retrieved_chunks"]
 
 
 
88
 
89
- filtered_docs = []
90
- for doc in retrieved_docs:
91
- prompt = f"""
92
- You are a grader assessing relevance of a retrieved document to a user question.
93
-
94
- Retrieved document:
95
- {doc['text']}
96
-
97
- User question:
98
- {query}
99
-
100
- If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant.
101
- Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
102
-
103
- Answer ONLY 'yes' or 'no'.
104
- """
105
- model = genai.GenerativeModel(MODEL_NAME)
106
- resp = generate_with_retry(model, prompt)
107
- score = resp.text.strip().lower() if resp else "no"
108
-
109
- if "yes" in score:
110
- filtered_docs.append(doc)
111
-
112
- return {**state, "retrieved_chunks": filtered_docs}
113
-
114
-
115
- # ===============================
116
- # RETRIEVAL EVALUATION (CRITIC)
117
- # ===============================
118
- def evaluate_retrieval_node(state: AgentState) -> AgentState:
119
- if not state["retrieved_chunks"]:
120
- return {**state, "retrieval_quality": "bad"}
121
-
122
- context_sample = "\n".join(c["text"][:200] for c in state["retrieved_chunks"][:3])
123
-
124
  prompt = f"""
125
- Evaluate whether the following retrieved context is sufficient
126
- to answer the question.
127
-
128
- Answer ONLY one word:
129
- - good
130
- - bad
131
-
132
- Question:
133
- {state["query"]}
134
-
135
- Context:
136
- {context_sample}
137
- """
138
-
139
- model = genai.GenerativeModel(MODEL_NAME)
140
- resp = generate_with_retry(model, prompt)
141
-
142
- quality = "bad"
143
- if resp and "good" in resp.text.lower():
144
- quality = "good"
145
 
146
- return {**state, "retrieval_quality": quality}
147
-
148
-
149
- # ===============================
150
- # QUERY REFINEMENT (SELF-CORRECTION)
151
- # ===============================
152
- def refine_query_node(state: AgentState) -> AgentState:
153
- history = format_history(state.get("messages", []))
154
- prompt = f"""
155
- Rewrite the following question to improve document retrieval.
156
- Be concise and factual.
157
-
158
- Conversation History:
159
- {history}
160
-
161
- Original question:
162
- {state["query"]}
163
- """
164
-
165
  model = genai.GenerativeModel(MODEL_NAME)
166
  resp = generate_with_retry(model, prompt)
 
167
 
168
- refined = resp.text.strip() if resp else state["query"]
169
-
170
- return {
171
- **state,
172
- "refined_query": refined,
173
- "retries": state["retries"] + 1
174
- }
175
-
176
 
177
- # ===============================
178
- # ANSWER WITH RAG (HIGH CONF)
179
- # ===============================
180
- def answer_with_rag_node(state: AgentState) -> AgentState:
181
- context = "\n\n".join(c["text"] for c in state["retrieved_chunks"])
182
- history = format_history(state.get("messages", []))
 
 
 
 
 
 
183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  prompt = f"""
185
- Answer using ONLY the context below.
186
- If the answer is not present, say "I don't know".
187
-
188
- Context:
189
- {context}
190
-
191
- Conversation History:
192
- {history}
193
-
194
- Question:
195
- {state["query"]}
196
- """
197
-
 
 
 
 
198
  model = genai.GenerativeModel(MODEL_NAME)
199
  resp = generate_with_retry(model, prompt)
200
- answer_text = resp.text if resp else "Error generating answer due to quota limits."
201
-
202
- answer_known = "i don't know" not in answer_text.lower()
203
- confidence = min(0.95, 0.6 + (0.1 * len(state["retrieved_chunks"])))
204
-
205
- log_eval(
206
- query=state["query"],
207
- retrieved_count=len(state["retrieved_chunks"]),
208
- confidence=confidence,
209
- answer_known=answer_known
210
- )
211
-
212
- # Append interaction to memory
213
- new_messages = [
214
- HumanMessage(content=state["query"]),
215
- AIMessage(content=answer_text)
216
- ]
217
-
218
- return {
219
- **state,
220
- "messages": new_messages,
221
- "answer": answer_text,
222
- "confidence": confidence,
223
- "answer_known": answer_known
224
- }
225
-
226
 
227
- # ===============================
228
- # ANSWER WITHOUT RAG
229
- # ===============================
230
- def answer_direct_node(state: AgentState) -> AgentState:
231
- history = format_history(state.get("messages", []))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  prompt = f"""
233
- Conversation History:
234
- {history}
235
-
236
- Answer clearly and concisely:
237
- {state['query']}
238
- """
239
-
 
 
 
 
 
 
 
 
240
  model = genai.GenerativeModel(MODEL_NAME)
241
  resp = generate_with_retry(model, prompt)
242
- answer_text = resp.text if resp else "Error generating answer due to quota limits."
243
-
244
- log_eval(
245
- query=state["query"],
246
- retrieved_count=0,
247
- confidence=0.4,
248
- answer_known=True
249
- )
250
-
251
- # Append interaction to memory
252
- new_messages = [
253
- HumanMessage(content=state["query"]),
254
- AIMessage(content=answer_text)
255
- ]
256
-
257
- return {
258
- **state,
259
- "messages": new_messages,
260
- "answer": answer_text,
261
- "confidence": 0.4,
262
- "answer_known": True
263
- }
264
-
265
-
266
- # ===============================
267
- # NO ANSWER
268
- # ===============================
269
- def no_answer_node(state: AgentState) -> AgentState:
270
- log_eval(
271
- query=state["query"],
272
- retrieved_count=0,
273
- confidence=0.0,
274
- answer_known=False
275
- )
276
-
277
- answer_text = "I don't know based on the provided documents."
278
 
279
- # Append interaction to memory
280
- new_messages = [
281
- HumanMessage(content=state["query"]),
282
- AIMessage(content=answer_text)
283
- ]
284
-
285
  return {
286
- **state,
287
- "messages": new_messages,
288
- "answer": answer_text,
289
- "confidence": 0.0,
290
- "answer_known": False
291
  }
292
 
293
-
294
  # ===============================
295
  # GRAPH BUILDER
296
  # ===============================
@@ -298,57 +232,40 @@ def build_agentic_rag_v2_graph():
298
  graph = StateGraph(AgentState)
299
  memory = MemorySaver()
300
 
301
- graph.add_node("decide", llm_decision_node)
302
- graph.add_node("retrieve", retrieve_node)
303
- graph.add_node("grade", grade_documents_node)
304
- graph.add_node("evaluate", evaluate_retrieval_node)
305
- graph.add_node("refine", refine_query_node)
306
- graph.add_node("answer_rag", answer_with_rag_node)
307
- graph.add_node("answer_direct", answer_direct_node)
308
- graph.add_node("no_answer", no_answer_node)
309
 
310
- graph.set_entry_point("decide")
311
 
 
312
  graph.add_conditional_edges(
313
- "decide",
314
- lambda s: s["decision"],
315
  {
316
- "use_rag": "retrieve",
317
- "no_rag": "answer_direct"
 
318
  }
319
  )
320
-
321
- graph.add_edge("retrieve", "grade")
322
-
323
- def check_relevance(state):
324
- if not state["retrieved_chunks"]:
325
- if state["retries"] >= MAX_RETRIES:
326
- return "no_answer"
327
- return "rewrite"
328
- return "evaluate"
329
-
330
- graph.add_conditional_edges(
331
- "grade",
332
- check_relevance,
333
- {
334
- "rewrite": "refine",
335
- "evaluate": "evaluate",
336
- "no_answer": "no_answer"
337
- }
338
- )
339
-
340
  graph.add_conditional_edges(
341
- "evaluate",
342
- lambda s: "retry" if s["retrieval_quality"] == "bad" and s["retries"] < MAX_RETRIES else "answer",
343
  {
344
- "retry": "refine",
345
- "answer": "answer_rag"
346
  }
347
  )
348
 
349
- graph.add_edge("refine", "retrieve")
350
- graph.add_edge("answer_rag", END)
351
- graph.add_edge("answer_direct", END)
352
- graph.add_edge("no_answer", END)
353
-
354
  return graph.compile(checkpointer=memory)
 
1
+ import os
2
+ import time
3
+ from typing import TypedDict, List, Optional, Annotated, Literal
4
  import google.generativeai as genai
5
  from langgraph.graph import StateGraph, END
6
  from langgraph.checkpoint.memory import MemorySaver
7
  from langgraph.graph.message import add_messages
8
+ from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
9
+ from tavily import TavilyClient
 
10
 
11
  from rag_store import search_knowledge
12
  from eval_logger import log_eval
13
  from llm_utils import generate_with_retry
14
 
15
+ # Config
16
  MODEL_NAME = "gemini-2.5-flash"
17
  MAX_RETRIES = 2
18
 
 
 
 
 
 
 
 
 
 
 
19
  # ===============================
20
  # STATE
21
  # ===============================
22
  class AgentState(TypedDict):
23
  messages: Annotated[List[BaseMessage], add_messages]
24
  query: str
25
+ final_answer: str
26
+
27
+ # Internal routing & scratchpad
28
+ next_node: str
29
+ current_tool: str
30
+ tool_outputs: List[dict] # list of {source: 'pdf'|'web', content: ..., score: ...}
31
+ verification_notes: str
32
  retries: int
 
 
 
 
33
 
34
  # ===============================
35
+ # TOOLS
36
  # ===============================
37
+ def pdf_search_tool(query: str):
38
+ """Searches internal PDF knowledge base."""
39
+ results = search_knowledge(query, top_k=4)
40
+ # Format for consumption
41
+ return [
42
+ {
43
+ "source": "internal_pdf",
44
+ "content": r["text"],
45
+ "metadata": r["metadata"],
46
+ "score": r.get("score", 0)
47
+ }
48
+ for r in results
49
+ ]
 
 
 
 
 
 
 
 
 
50
 
51
+ def web_search_tool(query: str):
52
+ """Searches the web using Tavily."""
53
+ api_key = os.getenv("TAVILY_API_KEY")
54
+ if not api_key:
55
+ return [{"source": "external_web", "content": "Error: TAVILY_API_KEY not found.", "score": 0}]
56
+
57
+ try:
58
+ tavily = TavilyClient(api_key=api_key)
59
+ # Search context first for cleaner text
60
+ context = tavily.get_search_context(query=query, search_depth="advanced")
61
+ return [{
62
+ "source": "external_web",
63
+ "content": context,
64
+ "score": 0.8 # Arbitrary confidence for web
65
+ }]
66
+ except Exception as e:
67
+ return [{"source": "external_web", "content": f"Web search error: {str(e)}", "score": 0}]
68
 
69
  # ===============================
70
+ # NODES
71
  # ===============================
 
 
 
 
 
72
 
73
+ # 1. SUPERVISOR
74
+ def supervisor_node(state: AgentState):
75
+ """Decides whether to research (and which tool) or answer."""
 
 
 
 
76
  query = state["query"]
77
+ history_len = len(state.get("messages", []))
78
+
79
+ # If we already have tools output, check if we need more or are done
80
+ tools_out = state.get("tool_outputs", [])
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  prompt = f"""
83
+ You are a Supervisor Agent.
84
+ User Query: "{query}"
85
+
86
+ Current Gathered Info Count: {len(tools_out)}
87
+
88
+ Decide next step:
89
+ 1. "research_pdf": If we haven't checked internal docs yet.
90
+ 2. "research_web": If PDF info is missing/insufficient and we haven't checked web yet.
91
+ 3. "responder": If we have enough info OR we have tried everything.
92
+
93
+ Return ONLY one of: research_pdf, research_web, responder
94
+ """
95
+
96
+ # Simple heuristic to save calls, or use LLM?
97
+ # Prompt says "Planning Node: The LLM must decide".
98
+
99
+ # We can force PDF first to be efficient
100
+ has_pdf = any(t["source"] == "internal_pdf" for t in tools_out)
101
+ if not has_pdf:
102
+ return {**state, "next_node": "research_pdf"}
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  model = genai.GenerativeModel(MODEL_NAME)
105
  resp = generate_with_retry(model, prompt)
106
+ decision = resp.text.strip().lower() if resp else "responder"
107
 
108
+ if "pdf" in decision: return {**state, "next_node": "research_pdf"}
109
+ if "web" in decision: return {**state, "next_node": "research_web"}
110
+
111
+ return {**state, "next_node": "responder"}
 
 
 
 
112
 
113
+ # 2. RESEARCHER (PDF)
114
+ def researcher_pdf_node(state: AgentState):
115
+ query = state["query"]
116
+ results = pdf_search_tool(query)
117
+
118
+ # Append to tool_outputs
119
+ current_outputs = state.get("tool_outputs", []) + results
120
+
121
+ # Log
122
+ log_eval(query, len(results), 0.9, len(results) > 0, source_type="internal_pdf")
123
+
124
+ return {**state, "tool_outputs": current_outputs}
125
 
126
+ # 3. RESEARCHER (WEB)
127
+ def researcher_web_node(state: AgentState):
128
+ query = state["query"]
129
+ results = web_search_tool(query)
130
+
131
+ current_outputs = state.get("tool_outputs", []) + results
132
+
133
+ # Log
134
+ log_eval(query, 1, 0.7, True, source_type="external_web")
135
+
136
+ return {**state, "tool_outputs": current_outputs}
137
+
138
+ # 4. VERIFIER
139
+ def verifier_node(state: AgentState):
140
+ """Cross-references Web findings against PDF context."""
141
+ tool_outputs = state.get("tool_outputs", [])
142
+ web_content = [t for t in tool_outputs if t["source"] == "external_web"]
143
+ pdf_content = [t for t in tool_outputs if t["source"] == "internal_pdf"]
144
+
145
+ if not web_content:
146
+ return state # Nothing to verify
147
+
148
+ # If we skipped PDF for some reason, let's quick-check it now for verification context
149
+ if not pdf_content:
150
+ pdf_content = pdf_search_tool(state["query"])
151
+
152
+ web_text = "\n".join([c["content"] for c in web_content])
153
+ pdf_text = "\n".join([c["content"] for c in pdf_content])
154
+
155
  prompt = f"""
156
+ You are a Skeptical Verifier.
157
+
158
+ Query: {state["query"]}
159
+
160
+ INTERNAL PDF KNOWLEDGE:
161
+ {pdf_text[:2000]}
162
+
163
+ EXTERNAL WEB FINDINGS:
164
+ {web_text[:2000]}
165
+
166
+ Task:
167
+ Check if the External Web Findings contradict the Internal PDF Knowledge.
168
+ If Web says 'X' and PDF says 'Y', report the conflict.
169
+
170
+ Output a brief "Verification Note". If no conflict, say "No conflict".
171
+ """
172
+
173
  model = genai.GenerativeModel(MODEL_NAME)
174
  resp = generate_with_retry(model, prompt)
175
+ note = resp.text.strip() if resp else "Verification failed."
176
+
177
+ current_notes = state.get("verification_notes", "")
178
+ new_notes = f"{current_notes}\n[Verification]: {note}"
179
+
180
+ return {**state, "verification_notes": new_notes}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
+ # 5. RESPONDER
183
+ def responder_node(state: AgentState):
184
+ query = state["query"]
185
+ tools_out = state.get("tool_outputs", [])
186
+ notes = state.get("verification_notes", "")
187
+
188
+ # Check if we found nothing
189
+ if not tools_out and state["retries"] < 1:
190
+ # Self-correction: Rewrite
191
+ prompt = f"Rewrite this query to be more specific: {query}"
192
+ model = genai.GenerativeModel(MODEL_NAME)
193
+ resp = generate_with_retry(model, prompt)
194
+ new_query = resp.text.strip() if resp else query
195
+ return {**state, "query": new_query, "retries": state["retries"] + 1, "next_node": "supervisor"} # Loop back
196
+
197
+ context = ""
198
+ for t in tools_out:
199
+ context += f"\n[{t['source'].upper()}]: {t['content'][:500]}..."
200
+
201
  prompt = f"""
202
+ You are the Final Responder.
203
+ User Query: {query}
204
+
205
+ Gathered Info:
206
+ {context}
207
+
208
+ Verification Notes (Conflicts?):
209
+ {notes}
210
+
211
+ Instructions:
212
+ 1. Answer the user query based on gathered info.
213
+ 2. If there are conflicts (e.g. PDF vs Web), explicitly mention them and trust PDF more but note the Web claim.
214
+ 3. Cite sources (Internal PDF vs External Web).
215
+ """
216
+
217
  model = genai.GenerativeModel(MODEL_NAME)
218
  resp = generate_with_retry(model, prompt)
219
+ answer = resp.text if resp else "I could not generate an answer."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
 
 
 
 
 
 
221
  return {
222
+ **state,
223
+ "final_answer": answer,
224
+ "messages": [AIMessage(content=answer)],
225
+ "next_node": "end"
 
226
  }
227
 
 
228
  # ===============================
229
  # GRAPH BUILDER
230
  # ===============================
 
232
  graph = StateGraph(AgentState)
233
  memory = MemorySaver()
234
 
235
+ graph.add_node("supervisor", supervisor_node)
236
+ graph.add_node("research_pdf", researcher_pdf_node)
237
+ graph.add_node("research_web", researcher_web_node)
238
+ graph.add_node("verifier", verifier_node)
239
+ graph.add_node("responder", responder_node)
 
 
 
240
 
241
+ graph.set_entry_point("supervisor")
242
 
243
+ # Routing
244
  graph.add_conditional_edges(
245
+ "supervisor",
246
+ lambda s: s["next_node"],
247
  {
248
+ "research_pdf": "research_pdf",
249
+ "research_web": "research_web",
250
+ "responder": "responder"
251
  }
252
  )
253
+
254
+ # Research PDF -> Supervisor (to decide if Web is needed)
255
+ graph.add_edge("research_pdf", "supervisor")
256
+
257
+ # Research Web -> Verifier -> Supervisor
258
+ graph.add_edge("research_web", "verifier")
259
+ graph.add_edge("verifier", "supervisor")
260
+
261
+ # Responder -> Maybe loop back if self-correction triggered?
 
 
 
 
 
 
 
 
 
 
 
262
  graph.add_conditional_edges(
263
+ "responder",
264
+ lambda s: "supervisor" if s["next_node"] == "supervisor" else "end",
265
  {
266
+ "supervisor": "supervisor",
267
+ "end": END
268
  }
269
  )
270
 
 
 
 
 
 
271
  return graph.compile(checkpointer=memory)
eval_logger.py CHANGED
@@ -7,14 +7,16 @@ def log_eval(
7
  query: str,
8
  retrieved_count: int,
9
  confidence: float,
10
- answer_known: bool
 
11
  ):
12
  record = {
13
  "timestamp": time(),
14
  "query": query,
15
  "retrieved_count": retrieved_count,
16
  "confidence": confidence,
17
- "answer_known": answer_known
 
18
  }
19
 
20
  with open(LOG_FILE, "a", encoding="utf-8") as f:
 
7
  query: str,
8
  retrieved_count: int,
9
  confidence: float,
10
+ answer_known: bool,
11
+ source_type: str = "internal_pdf" # Added source_type
12
  ):
13
  record = {
14
  "timestamp": time(),
15
  "query": query,
16
  "retrieved_count": retrieved_count,
17
  "confidence": confidence,
18
+ "answer_known": answer_known,
19
+ "source_type": source_type
20
  }
21
 
22
  with open(LOG_FILE, "a", encoding="utf-8") as f:
main.py CHANGED
@@ -8,10 +8,11 @@ from pydantic import BaseModel
8
  from dotenv import load_dotenv
9
  import google.generativeai as genai
10
 
11
- from rag_store import ingest_documents, get_all_chunks, clear_database
12
  from analytics import get_analytics
13
  from agentic_rag_v2_graph import build_agentic_rag_v2_graph
14
  from llm_utils import generate_with_retry
 
15
 
16
  # =========================================================
17
  # ENV + MODEL
@@ -19,6 +20,7 @@ from llm_utils import generate_with_retry
19
  load_dotenv()
20
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
21
 
 
22
  MODEL_NAME = "gemini-2.5-flash"
23
  MAX_FILE_SIZE = 50 * 1024 * 1024
24
  CACHE_TTL = 300
@@ -115,6 +117,49 @@ async def ask(data: PromptRequest):
115
  if now - ts < CACHE_TTL:
116
  return cached
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  # ==========================
119
  # 🟦 SUMMARY (BYPASS AGENT)
120
  # ==========================
@@ -142,27 +187,48 @@ async def ask(data: PromptRequest):
142
  # ==========================
143
  # 🟩 AGENTIC RAG (LLM + EVALUATION)
144
  # ==========================
145
- result = agentic_graph.invoke({
 
 
 
 
146
  "messages": [],
147
  "query": query,
148
- "refined_query": "",
149
- "decision": "",
150
- "retrieved_chunks": [],
151
- "retrieval_quality": "",
152
- "retries": 0,
153
- "answer": None,
154
- "confidence": 0.0,
155
- "answer_known": False
156
- }, config={"configurable": {"thread_id": data.thread_id}})
157
-
158
- response = {
159
- "answer": result["answer"],
160
- "confidence": result["confidence"],
161
- "citations": list({
162
- (c["metadata"]["source"], c["metadata"]["page"]): c["metadata"]
163
- for c in result.get("retrieved_chunks", [])
164
- }.values())
165
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
- answer_cache[key] = (now, response)
168
- return response
 
8
  from dotenv import load_dotenv
9
  import google.generativeai as genai
10
 
11
+ from rag_store import ingest_documents, get_all_chunks, clear_database, search_knowledge
12
  from analytics import get_analytics
13
  from agentic_rag_v2_graph import build_agentic_rag_v2_graph
14
  from llm_utils import generate_with_retry
15
+ import asyncio
16
 
17
  # =========================================================
18
  # ENV + MODEL
 
20
  load_dotenv()
21
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
22
 
23
+ MOCK_MODE = False # Refactor complete - enabling real agent
24
  MODEL_NAME = "gemini-2.5-flash"
25
  MAX_FILE_SIZE = 50 * 1024 * 1024
26
  CACHE_TTL = 300
 
117
  if now - ts < CACHE_TTL:
118
  return cached
119
 
120
+ # ==========================
121
+ # 🟧 MOCK MODE (NO API)
122
+ # ==========================
123
+ if MOCK_MODE:
124
+ await asyncio.sleep(0.5) # Simulate latency
125
+
126
+ mock_answer = ""
127
+ mock_citations = []
128
+
129
+ if "summary" in key or "summarize" in key:
130
+ # Local summary mock
131
+ chunks = get_all_chunks(limit=3)
132
+ mock_answer = "⚠️ **MOCK SUMMARY** ⚠️\n\n(API Quota Exhausted - Showing direct database content)\n\n"
133
+ for c in chunks:
134
+ # Show full text to avoid breaking markdown tables
135
+ mock_answer += f"### Chunk from {c['metadata']['source']}\n{c['text']}\n\n---\n\n"
136
+ else:
137
+ # Local retrieval mock
138
+ retrieved = search_knowledge(query)
139
+ mock_answer = "⚠️ **MOCK RESPONSE** ⚠️\n\n(API Quota Exhausted)\n\nI found the following relevant information in your documents using local search (Exact text match):\n\n"
140
+
141
+ seen_sources = set()
142
+ for r in retrieved:
143
+ # Add citation
144
+ meta = r["metadata"]
145
+ if (meta["source"], meta["page"]) not in seen_sources:
146
+ mock_citations.append(meta)
147
+ seen_sources.add((meta["source"], meta["page"]))
148
+
149
+ # Show full text of the relevant chunk
150
+ mock_answer += f"> **Source: {meta['source']}**\n\n{r['text']}\n\n---\n"
151
+
152
+ if not retrieved:
153
+ mock_answer += "No relevant documents found in the local index."
154
+
155
+ response = {
156
+ "answer": mock_answer,
157
+ "confidence": 0.85,
158
+ "citations": mock_citations
159
+ }
160
+ answer_cache[key] = (now, response)
161
+ return response
162
+
163
  # ==========================
164
  # 🟦 SUMMARY (BYPASS AGENT)
165
  # ==========================
 
187
  # ==========================
188
  # 🟩 AGENTIC RAG (LLM + EVALUATION)
189
  # ==========================
190
+ # ==========================
191
+ # 🟩 AGENTIC RAG (MULTI-TOOL SUPERVISOR)
192
+ # ==========================
193
+ # Initialize state for new graph
194
+ initial_state = {
195
  "messages": [],
196
  "query": query,
197
+ "final_answer": "",
198
+ "next_node": "",
199
+ "current_tool": "",
200
+ "tool_outputs": [],
201
+ "verification_notes": "",
202
+ "retries": 0
 
 
 
 
 
 
 
 
 
 
 
203
  }
204
+
205
+ try:
206
+ result = agentic_graph.invoke(initial_state, config={"configurable": {"thread_id": data.thread_id}})
207
+
208
+ # Extract citations from tool outputs
209
+ citations = []
210
+ seen = set()
211
+ for t in result.get("tool_outputs", []):
212
+ src = t.get("source", "unknown")
213
+ # If it's a PDF, it has metadata
214
+ if src == "internal_pdf":
215
+ meta = t.get("metadata", {})
216
+ key_ = (meta.get("source"), meta.get("page"))
217
+ if key_ not in seen:
218
+ citations.append(meta)
219
+ seen.add(key_)
220
+ # If it's Web, just cite the source
221
+ elif src == "external_web":
222
+ citations.append({"source": "Tavily Web Search", "page": "Web"})
223
+
224
+ response = {
225
+ "answer": result.get("final_answer", "No answer produced."),
226
+ "confidence": 0.9 if result.get("tool_outputs") else 0.1,
227
+ "citations": citations
228
+ }
229
+
230
+ answer_cache[key] = (now, response)
231
+ return response
232
 
233
+ except Exception as e:
234
+ return JSONResponse(status_code=500, content={"error": f"Agent execution failed: {str(e)}"})