Kalpokoch commited on
Commit
f13ef99
Β·
verified Β·
1 Parent(s): 4dd3f6e

Update app/app.py

Browse files
Files changed (1) hide show
  1. app/app.py +99 -96
app/app.py CHANGED
@@ -3,27 +3,29 @@ import json
3
  import asyncio
4
  import logging
5
  import uuid
6
- import re
7
  from fastapi import FastAPI, HTTPException, Request
8
  from pydantic import BaseModel
9
- from typing import Optional
10
  from llama_cpp import Llama
11
- from typing import Optional, Dict, List
12
 
13
  # Correctly reference the module within the 'app' package
14
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
15
 
 
16
  # -----------------------------
17
  # βœ… Logging Configuration
18
  # -----------------------------
19
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - [%(request_id)s] - %(message)s')
20
 
 
21
  class RequestIdAdapter(logging.LoggerAdapter):
22
  def process(self, msg, kwargs):
23
- return '[%s] %s' % (self.extra['request_id'], msg), kwargs
 
24
 
25
  logger = logging.getLogger("app")
26
 
 
27
  # -----------------------------
28
  # βœ… Configuration
29
  # -----------------------------
@@ -34,11 +36,14 @@ LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
34
  RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
35
  TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "3"))
36
  TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "1"))
 
 
37
 
38
  # -----------------------------
39
  # βœ… Initialize FastAPI App
40
  # -----------------------------
41
- app = FastAPI(title="NEEPCO DoP RAG Chatbot", version="2.1.0")
 
42
 
43
  @app.middleware("http")
44
  async def add_request_id(request: Request, call_next):
@@ -48,6 +53,7 @@ async def add_request_id(request: Request, call_next):
48
  response.headers["X-Request-ID"] = request_id
49
  return response
50
 
 
51
  # -----------------------------
52
  # βœ… Vector DB and Data Initialization
53
  # -----------------------------
@@ -71,19 +77,20 @@ except Exception as e:
71
  db = None
72
  db_ready = False
73
 
 
74
  # -----------------------------
75
- # βœ… Load TinyLlama GGUF Model with Improved Settings
76
  # -----------------------------
77
  logger.info(f"Loading GGUF model from: {MODEL_PATH}")
78
  try:
79
  llm = Llama(
80
  model_path=MODEL_PATH,
81
  n_ctx=2048,
82
- n_threads=2, # Increased threads for better performance
83
- n_batch=256, # Reduced batch size for stability
84
  use_mlock=True,
85
  verbose=False,
86
- seed=42 # Added seed for reproducible results
87
  )
88
  logger.info("GGUF model loaded successfully.")
89
  model_ready = True
@@ -92,18 +99,21 @@ except Exception as e:
92
  llm = None
93
  model_ready = False
94
 
 
95
  # -----------------------------
96
  # βœ… API Schemas
97
  # -----------------------------
98
  class Query(BaseModel):
99
  question: str
100
 
 
101
  class AdvancedQuery(BaseModel):
102
  question: str
103
  section_filter: Optional[str] = None
104
  chunk_type_filter: Optional[str] = None
105
  top_k: Optional[int] = None
106
 
 
107
  class Feedback(BaseModel):
108
  request_id: str
109
  question: str
@@ -112,20 +122,22 @@ class Feedback(BaseModel):
112
  feedback: str
113
  comment: str | None = None
114
 
 
115
  # -----------------------------
116
  # βœ… Helper Functions
117
  # -----------------------------
118
  def get_logger_adapter(request: Request):
119
  return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
120
 
 
121
  def get_chunk_priority(chunk: Dict) -> int:
122
  """Assign priority to different chunk types for better context selection"""
123
  priority_order = [
124
- 'approval_authority',
125
- 'delegation_summary',
126
- 'requirement',
127
- 'method_specific',
128
- 'board_approval',
129
  'financial_concurrence',
130
  'composition'
131
  ]
@@ -135,120 +147,103 @@ def get_chunk_priority(chunk: Dict) -> int:
135
  except ValueError:
136
  return len(priority_order) # Lower priority for unknown types
137
 
 
138
  def detect_filters(question_lower: str) -> tuple:
139
  """Detect section and chunk type filters from user question"""
140
  section_filter = None
141
  chunk_type_filter = None
142
-
143
  # Section keyword mapping
144
  section_keywords = {
145
  "annexure": "Annexure A",
146
- "financial concurrence": "Financial Concurrence",
147
  "guidelines": "Guidelines",
148
  "section 1": "I", "section i": "I",
149
  "section 2": "II", "section ii": "II",
150
- "section 3": "III", "section iii": "III",
151
  "section 4": "IV", "section iv": "IV"
152
  }
153
-
154
  # Chunk type keyword mapping
155
  chunk_type_keywords = {
156
  "approval": "approval_authority",
157
  "delegation": "delegation_summary",
158
- "requirement": "requirement",
159
  "method": "method_specific",
160
  "board": "board_approval",
161
  "committee": "composition"
162
  }
163
-
164
- # Check for section filters
165
  for keyword, section in section_keywords.items():
166
  if keyword in question_lower:
167
  section_filter = section
168
  break
169
-
170
- # Check for chunk type filters
171
  for keyword, chunk_type in chunk_type_keywords.items():
172
  if keyword in question_lower:
173
  chunk_type_filter = chunk_type
174
  break
175
-
176
  return section_filter, chunk_type_filter
177
 
 
178
  def clean_llm_response(raw_response: str) -> str:
179
- """Clean and validate LLM response"""
180
  if not raw_response:
181
  return ""
182
-
183
- # Remove common unwanted patterns
184
- cleaned = raw_response.strip()
185
-
186
- # Remove incomplete sentences at the end
187
- if cleaned and not cleaned.endswith(('.', '!', '?', ':', '|')):
188
- # Find the last complete sentence
189
- sentences = re.split(r'[.!?]', cleaned)
190
- if len(sentences) > 1:
191
- cleaned = '.'.join(sentences[:-1]) + '.'
192
-
193
- return cleaned
194
 
195
  async def generate_llm_response(prompt: str, request_id: str, adapter: RequestIdAdapter):
196
- """Improved LLM response generation with better error handling"""
197
  loop = asyncio.get_running_loop()
198
-
199
- # Multiple generation attempts with different parameters
200
  generation_configs = [
201
- {
202
- "max_tokens": 512,
203
- "temperature": 0.1,
204
- "top_p": 0.9,
205
- "repeat_penalty": 1.1,
206
- "stop": ["</s>", "[INST]", "[/INST]", "Question:", "Context:", "###"]
207
- },
208
- {
209
- "max_tokens": 256,
210
- "temperature": 0.3,
211
- "top_p": 0.8,
212
- "repeat_penalty": 1.2,
213
- "stop": ["</s>", "\n\n", "Question:", "Context:"]
214
- },
215
- {
216
- "max_tokens": 128,
217
- "temperature": 0.5,
218
- "top_p": 0.7,
219
- "repeat_penalty": 1.15,
220
- "stop": ["</s>"]
221
- }
222
  ]
223
-
224
  for attempt, config in enumerate(generation_configs, 1):
225
  try:
226
  adapter.info(f"LLM generation attempt {attempt}/{len(generation_configs)} with config: {config}")
227
-
228
  response = await loop.run_in_executor(
229
  None,
230
  lambda: llm(prompt, echo=False, **config)
231
  )
232
-
233
- raw_answer = response["choices"][0]["text"]
 
 
 
 
 
 
 
 
 
 
 
234
  cleaned_answer = clean_llm_response(raw_answer)
235
-
236
  adapter.info(f"Attempt {attempt} - Raw response length: {len(raw_answer)}, Cleaned length: {len(cleaned_answer)}")
237
-
238
- if cleaned_answer and len(cleaned_answer.strip()) > 10: # Minimum meaningful response
 
239
  adapter.info(f"Successful generation on attempt {attempt}")
240
  return cleaned_answer
241
  else:
242
  adapter.warning(f"Attempt {attempt} produced insufficient response: '{cleaned_answer}'")
243
-
244
  except Exception as e:
245
- adapter.error(f"Attempt {attempt} failed: {e}")
246
  continue
247
-
248
- # If all attempts fail, return a fallback message
249
  adapter.error("All LLM generation attempts failed")
250
  raise ValueError("Unable to generate a meaningful response after multiple attempts")
251
 
 
252
  # -----------------------------
253
  # βœ… Endpoints
254
  # -----------------------------
@@ -256,6 +251,7 @@ async def generate_llm_response(prompt: str, request_id: str, adapter: RequestId
256
  async def root():
257
  return {"status": "βœ… Server is running."}
258
 
 
259
  @app.get("/health")
260
  async def health_check():
261
  status = {
@@ -268,6 +264,7 @@ async def health_check():
268
  raise HTTPException(status_code=503, detail=status)
269
  return status
270
 
 
271
  @app.post("/chat")
272
  async def chat(query: Query, request: Request):
273
  adapter = get_logger_adapter(request)
@@ -298,19 +295,19 @@ async def chat(query: Query, request: Request):
298
 
299
  # 1. Enhanced Search with potential filtering
300
  section_filter, chunk_type_filter = detect_filters(question_lower)
301
-
302
  if section_filter or chunk_type_filter:
303
  adapter.info(f"Detected filters - section: '{section_filter}', chunk_type: '{chunk_type_filter}'")
304
  search_results = db.search_with_filters(
305
- query.question,
306
  top_k=TOP_K_SEARCH,
307
  section_filter=section_filter,
308
  chunk_type_filter=chunk_type_filter
309
  )
310
- adapter.info(f"Used filtered search")
311
  else:
312
  search_results = db.search(query.question, top_k=TOP_K_SEARCH)
313
- adapter.info(f"Used regular search")
314
 
315
  if not search_results:
316
  adapter.warning("No relevant context found in vector DB.")
@@ -325,7 +322,7 @@ async def chat(query: Query, request: Request):
325
  chunk_types = [result['metadata'].get('chunk_type', 'unknown') for result in search_results]
326
  sections = [result['metadata'].get('section', 'unknown') for result in search_results]
327
  scores = [f"{result['relevance_score']:.4f}" for result in search_results]
328
-
329
  adapter.info(f"Found {len(search_results)} relevant chunks")
330
  adapter.info(f"Chunk types: {chunk_types}")
331
  adapter.info(f"Sections: {sections}")
@@ -333,8 +330,6 @@ async def chat(query: Query, request: Request):
333
 
334
  # 3. Prioritize chunk types for better context selection
335
  prioritized_results = sorted(search_results, key=lambda x: (get_chunk_priority(x), -x['relevance_score']))
336
-
337
- # Log prioritization results
338
  prioritized_types = [result['metadata'].get('chunk_type', 'unknown') for result in prioritized_results]
339
  adapter.info(f"Prioritized chunk types order: {prioritized_types}")
340
 
@@ -353,19 +348,25 @@ async def chat(query: Query, request: Request):
353
  'score': f"{result['relevance_score']:.4f}"
354
  }
355
  context_metadata.append(context_info)
356
-
357
- adapter.info(f"Selected context metadata: {context_metadata}")
358
 
359
- # 6. Build Improved Prompt for TinyLlama
360
- prompt = f"""[INST] You are a helpful assistant for NEEPCO's Delegation of Powers policy. Answer the question using only the provided context.
361
-
362
- Context: {context}
363
-
364
- Question: {query.question}
365
 
366
- Provide a clear, direct answer based only on the context above. If the context doesn't contain the information, say "The provided policy context does not contain information on this topic."
 
 
 
 
 
 
 
 
 
367
 
368
- Answer: [/INST]"""
 
 
 
 
369
 
370
  # 7. Generate Response
371
  answer = "An error occurred while processing your request."
@@ -375,7 +376,7 @@ Answer: [/INST]"""
375
  generate_llm_response(prompt, request.state.request_id, adapter),
376
  timeout=LLM_TIMEOUT_SECONDS
377
  )
378
-
379
  adapter.info(f"LLM generation successful. Response length: {len(raw_answer)}")
380
 
381
  # --- POST-PROCESSING LOGIC ---
@@ -394,7 +395,7 @@ Answer: [/INST]"""
394
  adapter.error(f"An unexpected error occurred during LLM generation: {e}", exc_info=True)
395
  answer = "Sorry, an unexpected error occurred while generating a response."
396
 
397
- adapter.info(f"Final answer prepared. Returning to client.")
398
 
399
  return {
400
  "request_id": request.state.request_id,
@@ -403,23 +404,24 @@ Answer: [/INST]"""
403
  "answer": answer
404
  }
405
 
 
406
  @app.post("/advanced_search")
407
  async def advanced_search(query: AdvancedQuery, request: Request):
408
  """Advanced search endpoint with explicit filters"""
409
  adapter = get_logger_adapter(request)
410
-
411
  if not db_ready:
412
  raise HTTPException(status_code=503, detail="Database not ready")
413
-
414
  adapter.info(f"Advanced search: question='{query.question}', section='{query.section_filter}', chunk_type='{query.chunk_type_filter}'")
415
-
416
  search_results = db.search_with_filters(
417
  query.question,
418
  top_k=query.top_k or TOP_K_SEARCH,
419
  section_filter=query.section_filter,
420
  chunk_type_filter=query.chunk_type_filter
421
  )
422
-
423
  return {
424
  "request_id": request.state.request_id,
425
  "query": query.question,
@@ -437,6 +439,7 @@ async def advanced_search(query: AdvancedQuery, request: Request):
437
  ]
438
  }
439
 
 
440
  @app.post("/feedback")
441
  async def collect_feedback(feedback: Feedback, request: Request):
442
  adapter = get_logger_adapter(request)
@@ -451,4 +454,4 @@ async def collect_feedback(feedback: Feedback, request: Request):
451
  }
452
 
453
  adapter.info(json.dumps(feedback_log))
454
- return {"status": "βœ… Feedback recorded. Thank you!"}
 
3
  import asyncio
4
  import logging
5
  import uuid
 
6
  from fastapi import FastAPI, HTTPException, Request
7
  from pydantic import BaseModel
8
+ from typing import Optional, Dict
9
  from llama_cpp import Llama
 
10
 
11
  # Correctly reference the module within the 'app' package
12
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
13
 
14
+
15
  # -----------------------------
16
  # βœ… Logging Configuration
17
  # -----------------------------
18
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - [%(request_id)s] - %(message)s')
19
 
20
+
21
  class RequestIdAdapter(logging.LoggerAdapter):
22
  def process(self, msg, kwargs):
23
+ return '[%s] %s' % (self.extra.get('request_id', 'N/A'), msg), kwargs
24
+
25
 
26
  logger = logging.getLogger("app")
27
 
28
+
29
  # -----------------------------
30
  # βœ… Configuration
31
  # -----------------------------
 
36
  RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
37
  TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "3"))
38
  TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "1"))
39
+ LLM_THREADS = int(os.getenv("LLM_THREADS", "4")) # configurable threads
40
+
41
 
42
  # -----------------------------
43
  # βœ… Initialize FastAPI App
44
  # -----------------------------
45
+ app = FastAPI(title="NEEPCO DoP RAG Chatbot", version="2.1.1")
46
+
47
 
48
  @app.middleware("http")
49
  async def add_request_id(request: Request, call_next):
 
53
  response.headers["X-Request-ID"] = request_id
54
  return response
55
 
56
+
57
  # -----------------------------
58
  # βœ… Vector DB and Data Initialization
59
  # -----------------------------
 
77
  db = None
78
  db_ready = False
79
 
80
+
81
  # -----------------------------
82
+ # βœ… Load TinyLlama GGUF Model with Safer Generation
83
  # -----------------------------
84
  logger.info(f"Loading GGUF model from: {MODEL_PATH}")
85
  try:
86
  llm = Llama(
87
  model_path=MODEL_PATH,
88
  n_ctx=2048,
89
+ n_threads=LLM_THREADS,
90
+ n_batch=256,
91
  use_mlock=True,
92
  verbose=False,
93
+ seed=42
94
  )
95
  logger.info("GGUF model loaded successfully.")
96
  model_ready = True
 
99
  llm = None
100
  model_ready = False
101
 
102
+
103
  # -----------------------------
104
  # βœ… API Schemas
105
  # -----------------------------
106
  class Query(BaseModel):
107
  question: str
108
 
109
+
110
  class AdvancedQuery(BaseModel):
111
  question: str
112
  section_filter: Optional[str] = None
113
  chunk_type_filter: Optional[str] = None
114
  top_k: Optional[int] = None
115
 
116
+
117
  class Feedback(BaseModel):
118
  request_id: str
119
  question: str
 
122
  feedback: str
123
  comment: str | None = None
124
 
125
+
126
  # -----------------------------
127
  # βœ… Helper Functions
128
  # -----------------------------
129
  def get_logger_adapter(request: Request):
130
  return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
131
 
132
+
133
  def get_chunk_priority(chunk: Dict) -> int:
134
  """Assign priority to different chunk types for better context selection"""
135
  priority_order = [
136
+ 'approval_authority',
137
+ 'delegation_summary',
138
+ 'requirement',
139
+ 'method_specific',
140
+ 'board_approval',
141
  'financial_concurrence',
142
  'composition'
143
  ]
 
147
  except ValueError:
148
  return len(priority_order) # Lower priority for unknown types
149
 
150
+
151
  def detect_filters(question_lower: str) -> tuple:
152
  """Detect section and chunk type filters from user question"""
153
  section_filter = None
154
  chunk_type_filter = None
155
+
156
  # Section keyword mapping
157
  section_keywords = {
158
  "annexure": "Annexure A",
159
+ "financial concurrence": "Financial Concurrence",
160
  "guidelines": "Guidelines",
161
  "section 1": "I", "section i": "I",
162
  "section 2": "II", "section ii": "II",
163
+ "section 3": "III", "section iii": "III",
164
  "section 4": "IV", "section iv": "IV"
165
  }
166
+
167
  # Chunk type keyword mapping
168
  chunk_type_keywords = {
169
  "approval": "approval_authority",
170
  "delegation": "delegation_summary",
171
+ "requirement": "requirement",
172
  "method": "method_specific",
173
  "board": "board_approval",
174
  "committee": "composition"
175
  }
176
+
 
177
  for keyword, section in section_keywords.items():
178
  if keyword in question_lower:
179
  section_filter = section
180
  break
181
+
 
182
  for keyword, chunk_type in chunk_type_keywords.items():
183
  if keyword in question_lower:
184
  chunk_type_filter = chunk_type
185
  break
186
+
187
  return section_filter, chunk_type_filter
188
 
189
+
190
  def clean_llm_response(raw_response: str) -> str:
191
+ """Simplified cleaner to avoid over-trimming."""
192
  if not raw_response:
193
  return ""
194
+ return raw_response.strip()
195
+
 
 
 
 
 
 
 
 
 
 
196
 
197
  async def generate_llm_response(prompt: str, request_id: str, adapter: RequestIdAdapter):
198
+ """LLM response generation with safer stops and robust extraction."""
199
  loop = asyncio.get_running_loop()
200
+
201
+ # Use plain completion configs without fragile stop tokens
202
  generation_configs = [
203
+ {"max_tokens": 512, "temperature": 0.2, "top_p": 0.9, "repeat_penalty": 1.1, "stop": []},
204
+ {"max_tokens": 384, "temperature": 0.3, "top_p": 0.9, "repeat_penalty": 1.1, "stop": []},
205
+ {"max_tokens": 256, "temperature": 0.4, "top_p": 0.9, "repeat_penalty": 1.1, "stop": []},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  ]
207
+
208
  for attempt, config in enumerate(generation_configs, 1):
209
  try:
210
  adapter.info(f"LLM generation attempt {attempt}/{len(generation_configs)} with config: {config}")
211
+
212
  response = await loop.run_in_executor(
213
  None,
214
  lambda: llm(prompt, echo=False, **config)
215
  )
216
+
217
+ # Debug: log a truncated snapshot of the raw response
218
+ try:
219
+ adapter.info(f"Raw LLM response object (truncated): {json.dumps(response)[:1200]}")
220
+ except Exception:
221
+ pass
222
+
223
+ raw_answer = ""
224
+ if isinstance(response, dict) and "choices" in response and response["choices"]:
225
+ choice = response["choices"][0]
226
+ if isinstance(choice, dict):
227
+ raw_answer = choice.get("text") or choice.get("message", {}).get("content", "") or ""
228
+
229
  cleaned_answer = clean_llm_response(raw_answer)
 
230
  adapter.info(f"Attempt {attempt} - Raw response length: {len(raw_answer)}, Cleaned length: {len(cleaned_answer)}")
231
+
232
+ # Accept concise answers
233
+ if cleaned_answer and len(cleaned_answer.strip()) > 3:
234
  adapter.info(f"Successful generation on attempt {attempt}")
235
  return cleaned_answer
236
  else:
237
  adapter.warning(f"Attempt {attempt} produced insufficient response: '{cleaned_answer}'")
238
+
239
  except Exception as e:
240
+ adapter.error(f"Attempt {attempt} failed: {e}", exc_info=True)
241
  continue
242
+
 
243
  adapter.error("All LLM generation attempts failed")
244
  raise ValueError("Unable to generate a meaningful response after multiple attempts")
245
 
246
+
247
  # -----------------------------
248
  # βœ… Endpoints
249
  # -----------------------------
 
251
  async def root():
252
  return {"status": "βœ… Server is running."}
253
 
254
+
255
  @app.get("/health")
256
  async def health_check():
257
  status = {
 
264
  raise HTTPException(status_code=503, detail=status)
265
  return status
266
 
267
+
268
  @app.post("/chat")
269
  async def chat(query: Query, request: Request):
270
  adapter = get_logger_adapter(request)
 
295
 
296
  # 1. Enhanced Search with potential filtering
297
  section_filter, chunk_type_filter = detect_filters(question_lower)
298
+
299
  if section_filter or chunk_type_filter:
300
  adapter.info(f"Detected filters - section: '{section_filter}', chunk_type: '{chunk_type_filter}'")
301
  search_results = db.search_with_filters(
302
+ query.question,
303
  top_k=TOP_K_SEARCH,
304
  section_filter=section_filter,
305
  chunk_type_filter=chunk_type_filter
306
  )
307
+ adapter.info("Used filtered search")
308
  else:
309
  search_results = db.search(query.question, top_k=TOP_K_SEARCH)
310
+ adapter.info("Used regular search")
311
 
312
  if not search_results:
313
  adapter.warning("No relevant context found in vector DB.")
 
322
  chunk_types = [result['metadata'].get('chunk_type', 'unknown') for result in search_results]
323
  sections = [result['metadata'].get('section', 'unknown') for result in search_results]
324
  scores = [f"{result['relevance_score']:.4f}" for result in search_results]
325
+
326
  adapter.info(f"Found {len(search_results)} relevant chunks")
327
  adapter.info(f"Chunk types: {chunk_types}")
328
  adapter.info(f"Sections: {sections}")
 
330
 
331
  # 3. Prioritize chunk types for better context selection
332
  prioritized_results = sorted(search_results, key=lambda x: (get_chunk_priority(x), -x['relevance_score']))
 
 
333
  prioritized_types = [result['metadata'].get('chunk_type', 'unknown') for result in prioritized_results]
334
  adapter.info(f"Prioritized chunk types order: {prioritized_types}")
335
 
 
348
  'score': f"{result['relevance_score']:.4f}"
349
  }
350
  context_metadata.append(context_info)
 
 
351
 
352
+ adapter.info(f"Selected context metadata: {context_metadata}")
 
 
 
 
 
353
 
354
+ # 6. Build Plain Completion Prompt (no [INST] tags)
355
+ prompt = (
356
+ "You are a helpful assistant for NEEPCO's Delegation of Powers policy. "
357
+ "Answer the question using only the provided context.\n\n"
358
+ f"Context:\n{context}\n\n"
359
+ f"Question:\n{query.question}\n\n"
360
+ "Provide a clear, direct answer based only on the context above. If the context doesn't contain the information, "
361
+ "say \"The provided policy context does not contain information on this topic.\"\n\n"
362
+ "Answer:\n"
363
+ )
364
 
365
+ # Optional: log a short preview of the prompt to debug future issues (safe/truncated)
366
+ try:
367
+ adapter.info(f"Prompt preview (first 400 chars): {prompt[:400].replace(chr(10),' ')}")
368
+ except Exception:
369
+ pass
370
 
371
  # 7. Generate Response
372
  answer = "An error occurred while processing your request."
 
376
  generate_llm_response(prompt, request.state.request_id, adapter),
377
  timeout=LLM_TIMEOUT_SECONDS
378
  )
379
+
380
  adapter.info(f"LLM generation successful. Response length: {len(raw_answer)}")
381
 
382
  # --- POST-PROCESSING LOGIC ---
 
395
  adapter.error(f"An unexpected error occurred during LLM generation: {e}", exc_info=True)
396
  answer = "Sorry, an unexpected error occurred while generating a response."
397
 
398
+ adapter.info("Final answer prepared. Returning to client.")
399
 
400
  return {
401
  "request_id": request.state.request_id,
 
404
  "answer": answer
405
  }
406
 
407
+
408
  @app.post("/advanced_search")
409
  async def advanced_search(query: AdvancedQuery, request: Request):
410
  """Advanced search endpoint with explicit filters"""
411
  adapter = get_logger_adapter(request)
412
+
413
  if not db_ready:
414
  raise HTTPException(status_code=503, detail="Database not ready")
415
+
416
  adapter.info(f"Advanced search: question='{query.question}', section='{query.section_filter}', chunk_type='{query.chunk_type_filter}'")
417
+
418
  search_results = db.search_with_filters(
419
  query.question,
420
  top_k=query.top_k or TOP_K_SEARCH,
421
  section_filter=query.section_filter,
422
  chunk_type_filter=query.chunk_type_filter
423
  )
424
+
425
  return {
426
  "request_id": request.state.request_id,
427
  "query": query.question,
 
439
  ]
440
  }
441
 
442
+
443
  @app.post("/feedback")
444
  async def collect_feedback(feedback: Feedback, request: Request):
445
  adapter = get_logger_adapter(request)
 
454
  }
455
 
456
  adapter.info(json.dumps(feedback_log))
457
+ return {"status": "βœ… Feedback recorded. Thank you!"}