Kalpokoch commited on
Commit
85c3f24
·
verified ·
1 Parent(s): f13ef99

Update app/app.py

Browse files
Files changed (1) hide show
  1. app/app.py +58 -262
app/app.py CHANGED
@@ -3,47 +3,39 @@ import json
3
  import asyncio
4
  import logging
5
  import uuid
 
6
  from fastapi import FastAPI, HTTPException, Request
7
  from pydantic import BaseModel
8
- from typing import Optional, Dict
9
  from llama_cpp import Llama
10
-
11
  # Correctly reference the module within the 'app' package
12
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
13
 
14
-
15
  # -----------------------------
16
  # ✅ Logging Configuration
17
  # -----------------------------
18
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - [%(request_id)s] - %(message)s')
19
 
20
-
21
  class RequestIdAdapter(logging.LoggerAdapter):
22
  def process(self, msg, kwargs):
23
- return '[%s] %s' % (self.extra.get('request_id', 'N/A'), msg), kwargs
24
-
25
 
26
  logger = logging.getLogger("app")
27
 
28
-
29
  # -----------------------------
30
  # ✅ Configuration
31
  # -----------------------------
32
  DB_PERSIST_DIRECTORY = os.getenv("DB_PERSIST_DIRECTORY", "/app/vector_database")
33
  CHUNKS_FILE_PATH = os.getenv("CHUNKS_FILE_PATH", "/app/granular_chunks_final.jsonl")
34
  MODEL_PATH = os.getenv("MODEL_PATH", "/app/tinyllama_dop_q4_k_m.gguf")
35
- LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
36
  RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
37
  TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "3"))
38
  TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "1"))
39
- LLM_THREADS = int(os.getenv("LLM_THREADS", "4")) # configurable threads
40
-
41
 
42
  # -----------------------------
43
  # ✅ Initialize FastAPI App
44
  # -----------------------------
45
- app = FastAPI(title="NEEPCO DoP RAG Chatbot", version="2.1.1")
46
-
47
 
48
  @app.middleware("http")
49
  async def add_request_id(request: Request, call_next):
@@ -53,7 +45,6 @@ async def add_request_id(request: Request, call_next):
53
  response.headers["X-Request-ID"] = request_id
54
  return response
55
 
56
-
57
  # -----------------------------
58
  # ✅ Vector DB and Data Initialization
59
  # -----------------------------
@@ -64,33 +55,29 @@ try:
64
  top_k_default=TOP_K_SEARCH,
65
  relevance_threshold=RELEVANCE_THRESHOLD
66
  )
67
-
68
  if not ensure_db_populated(db, CHUNKS_FILE_PATH):
69
  logger.warning("DB not populated on startup. RAG will not function correctly.")
70
  db_ready = False
71
  else:
72
  logger.info("Vector DB is populated and ready.")
73
  db_ready = True
74
-
75
  except Exception as e:
76
  logger.error(f"FATAL: Failed to initialize Vector DB: {e}", exc_info=True)
77
  db = None
78
  db_ready = False
79
 
80
-
81
  # -----------------------------
82
- # ✅ Load TinyLlama GGUF Model with Safer Generation
83
  # -----------------------------
84
  logger.info(f"Loading GGUF model from: {MODEL_PATH}")
85
  try:
86
  llm = Llama(
87
  model_path=MODEL_PATH,
88
  n_ctx=2048,
89
- n_threads=LLM_THREADS,
90
- n_batch=256,
91
  use_mlock=True,
92
- verbose=False,
93
- seed=42
94
  )
95
  logger.info("GGUF model loaded successfully.")
96
  model_ready = True
@@ -99,21 +86,12 @@ except Exception as e:
99
  llm = None
100
  model_ready = False
101
 
102
-
103
  # -----------------------------
104
  # ✅ API Schemas
105
  # -----------------------------
106
  class Query(BaseModel):
107
  question: str
108
 
109
-
110
- class AdvancedQuery(BaseModel):
111
- question: str
112
- section_filter: Optional[str] = None
113
- chunk_type_filter: Optional[str] = None
114
- top_k: Optional[int] = None
115
-
116
-
117
  class Feedback(BaseModel):
118
  request_id: str
119
  question: str
@@ -122,136 +100,16 @@ class Feedback(BaseModel):
122
  feedback: str
123
  comment: str | None = None
124
 
125
-
126
  # -----------------------------
127
- # ✅ Helper Functions
128
  # -----------------------------
129
  def get_logger_adapter(request: Request):
130
  return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
131
 
132
-
133
- def get_chunk_priority(chunk: Dict) -> int:
134
- """Assign priority to different chunk types for better context selection"""
135
- priority_order = [
136
- 'approval_authority',
137
- 'delegation_summary',
138
- 'requirement',
139
- 'method_specific',
140
- 'board_approval',
141
- 'financial_concurrence',
142
- 'composition'
143
- ]
144
- chunk_type = chunk['metadata'].get('chunk_type', 'unknown')
145
- try:
146
- return priority_order.index(chunk_type)
147
- except ValueError:
148
- return len(priority_order) # Lower priority for unknown types
149
-
150
-
151
- def detect_filters(question_lower: str) -> tuple:
152
- """Detect section and chunk type filters from user question"""
153
- section_filter = None
154
- chunk_type_filter = None
155
-
156
- # Section keyword mapping
157
- section_keywords = {
158
- "annexure": "Annexure A",
159
- "financial concurrence": "Financial Concurrence",
160
- "guidelines": "Guidelines",
161
- "section 1": "I", "section i": "I",
162
- "section 2": "II", "section ii": "II",
163
- "section 3": "III", "section iii": "III",
164
- "section 4": "IV", "section iv": "IV"
165
- }
166
-
167
- # Chunk type keyword mapping
168
- chunk_type_keywords = {
169
- "approval": "approval_authority",
170
- "delegation": "delegation_summary",
171
- "requirement": "requirement",
172
- "method": "method_specific",
173
- "board": "board_approval",
174
- "committee": "composition"
175
- }
176
-
177
- for keyword, section in section_keywords.items():
178
- if keyword in question_lower:
179
- section_filter = section
180
- break
181
-
182
- for keyword, chunk_type in chunk_type_keywords.items():
183
- if keyword in question_lower:
184
- chunk_type_filter = chunk_type
185
- break
186
-
187
- return section_filter, chunk_type_filter
188
-
189
-
190
- def clean_llm_response(raw_response: str) -> str:
191
- """Simplified cleaner to avoid over-trimming."""
192
- if not raw_response:
193
- return ""
194
- return raw_response.strip()
195
-
196
-
197
- async def generate_llm_response(prompt: str, request_id: str, adapter: RequestIdAdapter):
198
- """LLM response generation with safer stops and robust extraction."""
199
- loop = asyncio.get_running_loop()
200
-
201
- # Use plain completion configs without fragile stop tokens
202
- generation_configs = [
203
- {"max_tokens": 512, "temperature": 0.2, "top_p": 0.9, "repeat_penalty": 1.1, "stop": []},
204
- {"max_tokens": 384, "temperature": 0.3, "top_p": 0.9, "repeat_penalty": 1.1, "stop": []},
205
- {"max_tokens": 256, "temperature": 0.4, "top_p": 0.9, "repeat_penalty": 1.1, "stop": []},
206
- ]
207
-
208
- for attempt, config in enumerate(generation_configs, 1):
209
- try:
210
- adapter.info(f"LLM generation attempt {attempt}/{len(generation_configs)} with config: {config}")
211
-
212
- response = await loop.run_in_executor(
213
- None,
214
- lambda: llm(prompt, echo=False, **config)
215
- )
216
-
217
- # Debug: log a truncated snapshot of the raw response
218
- try:
219
- adapter.info(f"Raw LLM response object (truncated): {json.dumps(response)[:1200]}")
220
- except Exception:
221
- pass
222
-
223
- raw_answer = ""
224
- if isinstance(response, dict) and "choices" in response and response["choices"]:
225
- choice = response["choices"][0]
226
- if isinstance(choice, dict):
227
- raw_answer = choice.get("text") or choice.get("message", {}).get("content", "") or ""
228
-
229
- cleaned_answer = clean_llm_response(raw_answer)
230
- adapter.info(f"Attempt {attempt} - Raw response length: {len(raw_answer)}, Cleaned length: {len(cleaned_answer)}")
231
-
232
- # Accept concise answers
233
- if cleaned_answer and len(cleaned_answer.strip()) > 3:
234
- adapter.info(f"Successful generation on attempt {attempt}")
235
- return cleaned_answer
236
- else:
237
- adapter.warning(f"Attempt {attempt} produced insufficient response: '{cleaned_answer}'")
238
-
239
- except Exception as e:
240
- adapter.error(f"Attempt {attempt} failed: {e}", exc_info=True)
241
- continue
242
-
243
- adapter.error("All LLM generation attempts failed")
244
- raise ValueError("Unable to generate a meaningful response after multiple attempts")
245
-
246
-
247
- # -----------------------------
248
- # ✅ Endpoints
249
- # -----------------------------
250
  @app.get("/")
251
  async def root():
252
  return {"status": "✅ Server is running."}
253
 
254
-
255
  @app.get("/health")
256
  async def health_check():
257
  status = {
@@ -259,17 +117,25 @@ async def health_check():
259
  "database_status": "ready" if db_ready else "error",
260
  "model_status": "ready" if model_ready else "error"
261
  }
262
-
263
  if not db_ready or not model_ready:
264
  raise HTTPException(status_code=503, detail=status)
265
  return status
266
 
 
 
 
 
 
 
 
 
 
 
267
 
268
  @app.post("/chat")
269
  async def chat(query: Query, request: Request):
270
  adapter = get_logger_adapter(request)
271
- question = query.question.strip()
272
- question_lower = question.lower()
273
 
274
  # --- GREETING & INTRO HANDLING ---
275
  greeting_keywords = ["hello", "hi", "hey", "what can you do", "who are you"]
@@ -293,99 +159,67 @@ async def chat(query: Query, request: Request):
293
 
294
  adapter.info(f"Received query: '{query.question}'")
295
 
296
- # 1. Enhanced Search with potential filtering
297
- section_filter, chunk_type_filter = detect_filters(question_lower)
298
-
299
- if section_filter or chunk_type_filter:
300
- adapter.info(f"Detected filters - section: '{section_filter}', chunk_type: '{chunk_type_filter}'")
301
- search_results = db.search_with_filters(
302
- query.question,
303
- top_k=TOP_K_SEARCH,
304
- section_filter=section_filter,
305
- chunk_type_filter=chunk_type_filter
306
- )
307
- adapter.info("Used filtered search")
308
- else:
309
- search_results = db.search(query.question, top_k=TOP_K_SEARCH)
310
- adapter.info("Used regular search")
311
 
312
  if not search_results:
313
  adapter.warning("No relevant context found in vector DB.")
314
  return {
315
- "request_id": request.state.request_id,
316
  "question": query.question,
317
  "context_used": "No relevant context found.",
318
  "answer": "Sorry, I could not find a relevant policy to answer that question. Please try rephrasing."
319
  }
320
-
321
- # 2. Enhanced logging of retrieved chunks
322
- chunk_types = [result['metadata'].get('chunk_type', 'unknown') for result in search_results]
323
- sections = [result['metadata'].get('section', 'unknown') for result in search_results]
324
  scores = [f"{result['relevance_score']:.4f}" for result in search_results]
 
325
 
326
- adapter.info(f"Found {len(search_results)} relevant chunks")
327
- adapter.info(f"Chunk types: {chunk_types}")
328
- adapter.info(f"Sections: {sections}")
329
- adapter.info(f"Relevance scores: {scores}")
330
-
331
- # 3. Prioritize chunk types for better context selection
332
- prioritized_results = sorted(search_results, key=lambda x: (get_chunk_priority(x), -x['relevance_score']))
333
- prioritized_types = [result['metadata'].get('chunk_type', 'unknown') for result in prioritized_results]
334
- adapter.info(f"Prioritized chunk types order: {prioritized_types}")
335
-
336
- # 4. Prepare Context using prioritized results
337
- context_chunks = [result['text'] for result in prioritized_results[:TOP_K_CONTEXT]]
338
  context = "\n---\n".join(context_chunks)
339
-
340
- # 5. Enhanced context logging
341
- context_metadata = []
342
- for result in prioritized_results[:TOP_K_CONTEXT]:
343
- metadata = result['metadata']
344
- context_info = {
345
- 'section': metadata.get('section', 'unknown'),
346
- 'clause': metadata.get('clause', 'unknown'),
347
- 'chunk_type': metadata.get('chunk_type', 'unknown'),
348
- 'score': f"{result['relevance_score']:.4f}"
349
- }
350
- context_metadata.append(context_info)
351
-
352
- adapter.info(f"Selected context metadata: {context_metadata}")
353
-
354
- # 6. Build Plain Completion Prompt (no [INST] tags)
355
- prompt = (
356
- "You are a helpful assistant for NEEPCO's Delegation of Powers policy. "
357
- "Answer the question using only the provided context.\n\n"
358
- f"Context:\n{context}\n\n"
359
- f"Question:\n{query.question}\n\n"
360
- "Provide a clear, direct answer based only on the context above. If the context doesn't contain the information, "
361
- "say \"The provided policy context does not contain information on this topic.\"\n\n"
362
- "Answer:\n"
363
- )
364
-
365
- # Optional: log a short preview of the prompt to debug future issues (safe/truncated)
366
- try:
367
- adapter.info(f"Prompt preview (first 400 chars): {prompt[:400].replace(chr(10),' ')}")
368
- except Exception:
369
- pass
370
-
371
- # 7. Generate Response
372
  answer = "An error occurred while processing your request."
373
  try:
374
  adapter.info("Sending prompt to LLM for generation...")
375
  raw_answer = await asyncio.wait_for(
376
- generate_llm_response(prompt, request.state.request_id, adapter),
377
  timeout=LLM_TIMEOUT_SECONDS
378
  )
379
-
380
- adapter.info(f"LLM generation successful. Response length: {len(raw_answer)}")
381
-
382
  # --- POST-PROCESSING LOGIC ---
 
383
  if '|' in raw_answer:
384
  adapter.info("Pipe separator found. Formatting response as a bulleted list.")
 
385
  items = raw_answer.split('|')
 
386
  cleaned_items = [f"* {item.strip()}" for item in items if item.strip()]
 
387
  answer = "\n".join(cleaned_items)
388
  else:
 
389
  answer = raw_answer
390
 
391
  except asyncio.TimeoutError:
@@ -395,8 +229,7 @@ async def chat(query: Query, request: Request):
395
  adapter.error(f"An unexpected error occurred during LLM generation: {e}", exc_info=True)
396
  answer = "Sorry, an unexpected error occurred while generating a response."
397
 
398
- adapter.info("Final answer prepared. Returning to client.")
399
-
400
  return {
401
  "request_id": request.state.request_id,
402
  "question": query.question,
@@ -404,42 +237,6 @@ async def chat(query: Query, request: Request):
404
  "answer": answer
405
  }
406
 
407
-
408
- @app.post("/advanced_search")
409
- async def advanced_search(query: AdvancedQuery, request: Request):
410
- """Advanced search endpoint with explicit filters"""
411
- adapter = get_logger_adapter(request)
412
-
413
- if not db_ready:
414
- raise HTTPException(status_code=503, detail="Database not ready")
415
-
416
- adapter.info(f"Advanced search: question='{query.question}', section='{query.section_filter}', chunk_type='{query.chunk_type_filter}'")
417
-
418
- search_results = db.search_with_filters(
419
- query.question,
420
- top_k=query.top_k or TOP_K_SEARCH,
421
- section_filter=query.section_filter,
422
- chunk_type_filter=query.chunk_type_filter
423
- )
424
-
425
- return {
426
- "request_id": request.state.request_id,
427
- "query": query.question,
428
- "filters": {
429
- "section": query.section_filter,
430
- "chunk_type": query.chunk_type_filter
431
- },
432
- "results": [
433
- {
434
- "text": result['text'],
435
- "metadata": result['metadata'],
436
- "relevance_score": result['relevance_score']
437
- }
438
- for result in search_results
439
- ]
440
- }
441
-
442
-
443
  @app.post("/feedback")
444
  async def collect_feedback(feedback: Feedback, request: Request):
445
  adapter = get_logger_adapter(request)
@@ -452,6 +249,5 @@ async def collect_feedback(feedback: Feedback, request: Request):
452
  "feedback": feedback.feedback,
453
  "comment": feedback.comment
454
  }
455
-
456
  adapter.info(json.dumps(feedback_log))
457
  return {"status": "✅ Feedback recorded. Thank you!"}
 
3
  import asyncio
4
  import logging
5
  import uuid
6
+ import re
7
  from fastapi import FastAPI, HTTPException, Request
8
  from pydantic import BaseModel
 
9
  from llama_cpp import Llama
 
10
  # Correctly reference the module within the 'app' package
11
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
12
 
 
13
  # -----------------------------
14
  # ✅ Logging Configuration
15
  # -----------------------------
16
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - [%(request_id)s] - %(message)s')
17
 
 
18
  class RequestIdAdapter(logging.LoggerAdapter):
19
  def process(self, msg, kwargs):
20
+ return '[%s] %s' % (self.extra['request_id'], msg), kwargs
 
21
 
22
  logger = logging.getLogger("app")
23
 
 
24
  # -----------------------------
25
  # ✅ Configuration
26
  # -----------------------------
27
  DB_PERSIST_DIRECTORY = os.getenv("DB_PERSIST_DIRECTORY", "/app/vector_database")
28
  CHUNKS_FILE_PATH = os.getenv("CHUNKS_FILE_PATH", "/app/granular_chunks_final.jsonl")
29
  MODEL_PATH = os.getenv("MODEL_PATH", "/app/tinyllama_dop_q4_k_m.gguf")
30
+ LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
31
  RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
32
  TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "3"))
33
  TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "1"))
 
 
34
 
35
  # -----------------------------
36
  # ✅ Initialize FastAPI App
37
  # -----------------------------
38
+ app = FastAPI(title="NEEPCO DoP RAG Chatbot", version="2.1.0")
 
39
 
40
  @app.middleware("http")
41
  async def add_request_id(request: Request, call_next):
 
45
  response.headers["X-Request-ID"] = request_id
46
  return response
47
 
 
48
  # -----------------------------
49
  # ✅ Vector DB and Data Initialization
50
  # -----------------------------
 
55
  top_k_default=TOP_K_SEARCH,
56
  relevance_threshold=RELEVANCE_THRESHOLD
57
  )
 
58
  if not ensure_db_populated(db, CHUNKS_FILE_PATH):
59
  logger.warning("DB not populated on startup. RAG will not function correctly.")
60
  db_ready = False
61
  else:
62
  logger.info("Vector DB is populated and ready.")
63
  db_ready = True
 
64
  except Exception as e:
65
  logger.error(f"FATAL: Failed to initialize Vector DB: {e}", exc_info=True)
66
  db = None
67
  db_ready = False
68
 
 
69
  # -----------------------------
70
+ # ✅ Load TinyLlama GGUF Model
71
  # -----------------------------
72
  logger.info(f"Loading GGUF model from: {MODEL_PATH}")
73
  try:
74
  llm = Llama(
75
  model_path=MODEL_PATH,
76
  n_ctx=2048,
77
+ n_threads=1,
78
+ n_batch=512,
79
  use_mlock=True,
80
+ verbose=False
 
81
  )
82
  logger.info("GGUF model loaded successfully.")
83
  model_ready = True
 
86
  llm = None
87
  model_ready = False
88
 
 
89
  # -----------------------------
90
  # ✅ API Schemas
91
  # -----------------------------
92
  class Query(BaseModel):
93
  question: str
94
 
 
 
 
 
 
 
 
 
95
  class Feedback(BaseModel):
96
  request_id: str
97
  question: str
 
100
  feedback: str
101
  comment: str | None = None
102
 
 
103
  # -----------------------------
104
+ # ✅ Endpoints
105
  # -----------------------------
106
  def get_logger_adapter(request: Request):
107
  return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  @app.get("/")
110
  async def root():
111
  return {"status": "✅ Server is running."}
112
 
 
113
  @app.get("/health")
114
  async def health_check():
115
  status = {
 
117
  "database_status": "ready" if db_ready else "error",
118
  "model_status": "ready" if model_ready else "error"
119
  }
 
120
  if not db_ready or not model_ready:
121
  raise HTTPException(status_code=503, detail=status)
122
  return status
123
 
124
+ async def generate_llm_response(prompt: str, request_id: str):
125
+ loop = asyncio.get_running_loop()
126
+ response = await loop.run_in_executor(
127
+ None,
128
+ lambda: llm(prompt, max_tokens=1024, stop=["###", "Question:", "Context:", "</s>"], temperature=0.05, echo=False)
129
+ )
130
+ answer = response["choices"][0]["text"].strip()
131
+ if not answer:
132
+ raise ValueError("Empty response from LLM")
133
+ return answer
134
 
135
  @app.post("/chat")
136
  async def chat(query: Query, request: Request):
137
  adapter = get_logger_adapter(request)
138
+ question_lower = query.question.strip().lower()
 
139
 
140
  # --- GREETING & INTRO HANDLING ---
141
  greeting_keywords = ["hello", "hi", "hey", "what can you do", "who are you"]
 
159
 
160
  adapter.info(f"Received query: '{query.question}'")
161
 
162
+ # 1. Search Vector DB
163
+ search_results = db.search(query.question, top_k=TOP_K_SEARCH)
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
  if not search_results:
166
  adapter.warning("No relevant context found in vector DB.")
167
  return {
 
168
  "question": query.question,
169
  "context_used": "No relevant context found.",
170
  "answer": "Sorry, I could not find a relevant policy to answer that question. Please try rephrasing."
171
  }
172
+
 
 
 
173
  scores = [f"{result['relevance_score']:.4f}" for result in search_results]
174
+ adapter.info(f"Found {len(search_results)} relevant chunks with scores: {scores}")
175
 
176
+ # 2. Prepare Context
177
+ context_chunks = [result['text'] for result in search_results[:TOP_K_CONTEXT]]
 
 
 
 
 
 
 
 
 
 
178
  context = "\n---\n".join(context_chunks)
179
+
180
+ # 3. Build Prompt with Separator Instruction
181
+ prompt = f"""<|system|>
182
+ You are a precise and factual assistant for NEEPCO's Delegation of Powers (DoP) policy.
183
+ Your task is to answer the user's question based ONLY on the provided context.
184
+
185
+ - **Formatting Rule:** If the answer contains a list of items or steps, you **MUST** separate each item with a pipe symbol (`|`). For example: `First item|Second item|Third item`.
186
+ - **Content Rule:** If the information is not in the provided context, you **MUST** reply with the exact phrase: "The provided policy context does not contain information on this topic."
187
+ </s>
188
+ <|user|>
189
+ ### Relevant Context:
190
+ ```
191
+ {context}
192
+ ```
193
+
194
+ ### Question:
195
+ {query.question}
196
+ </s>
197
+ <|assistant|>
198
+ ### Detailed Answer:
199
+ """
200
+
201
+ # 4. Generate Response
 
 
 
 
 
 
 
 
 
 
202
  answer = "An error occurred while processing your request."
203
  try:
204
  adapter.info("Sending prompt to LLM for generation...")
205
  raw_answer = await asyncio.wait_for(
206
+ generate_llm_response(prompt, request.state.request_id),
207
  timeout=LLM_TIMEOUT_SECONDS
208
  )
209
+ adapter.info(f"LLM generation successful. Raw response: {raw_answer[:250]}...")
210
+
 
211
  # --- POST-PROCESSING LOGIC ---
212
+ # Check if the model used the pipe separator, indicating a list.
213
  if '|' in raw_answer:
214
  adapter.info("Pipe separator found. Formatting response as a bulleted list.")
215
+ # Split the string into a list of items
216
  items = raw_answer.split('|')
217
+ # Clean up each item and format it as a bullet point
218
  cleaned_items = [f"* {item.strip()}" for item in items if item.strip()]
219
+ # Join them back together with newlines
220
  answer = "\n".join(cleaned_items)
221
  else:
222
+ # If no separator, use the answer as is.
223
  answer = raw_answer
224
 
225
  except asyncio.TimeoutError:
 
229
  adapter.error(f"An unexpected error occurred during LLM generation: {e}", exc_info=True)
230
  answer = "Sorry, an unexpected error occurred while generating a response."
231
 
232
+ adapter.info(f"Final answer prepared. Returning to client.")
 
233
  return {
234
  "request_id": request.state.request_id,
235
  "question": query.question,
 
237
  "answer": answer
238
  }
239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  @app.post("/feedback")
241
  async def collect_feedback(feedback: Feedback, request: Request):
242
  adapter = get_logger_adapter(request)
 
249
  "feedback": feedback.feedback,
250
  "comment": feedback.comment
251
  }
 
252
  adapter.info(json.dumps(feedback_log))
253
  return {"status": "✅ Feedback recorded. Thank you!"}