Kalpokoch commited on
Commit
95eb732
Β·
verified Β·
1 Parent(s): 9d6d993

Update app/app.py

Browse files
Files changed (1) hide show
  1. app/app.py +14 -27
app/app.py CHANGED
@@ -1,13 +1,8 @@
1
- # =======================
2
- # ⚑ GGUF + llama-cpp-python FastAPI App for HF Spaces (CPU Optimized)
3
- # =======================
4
-
5
  from fastapi import FastAPI
6
  from pydantic import BaseModel
7
  from llama_cpp import Llama
8
- import os
9
  import logging
10
- from typing import Optional
11
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
12
 
13
  # -----------------------------
@@ -21,15 +16,12 @@ logger = logging.getLogger("app")
21
  # -----------------------------
22
  app = FastAPI()
23
 
24
- # -----------------------------
25
- # βœ… Health Check Endpoint
26
- # -----------------------------
27
  @app.get("/")
28
  async def root():
29
  return {"status": "βœ… Server is running and ready."}
30
 
31
  # -----------------------------
32
- # βœ… Feedback Collection Endpoint
33
  # -----------------------------
34
  class Feedback(BaseModel):
35
  question: str
@@ -51,16 +43,15 @@ logger.info("[INFO] Initializing vector DB...")
51
  db = PolicyVectorDB(
52
  persist_directory=DB_PERSIST_DIRECTORY,
53
  top_k_default=7,
54
- relevance_threshold=0.45 # Lowered for broader matching
55
  )
56
-
57
  if not ensure_db_populated(db, CHUNKS_FILE_PATH):
58
  logger.warning("[WARNING] DB not populated. Chunks file may be missing.")
59
  else:
60
  logger.info("[INFO] Vector DB ready.")
61
 
62
  # -----------------------------
63
- # βœ… Load GGUF Model with llama-cpp-python
64
  # -----------------------------
65
  MODEL_PATH = "/app/tinyllama_dop_q4_k_m.gguf"
66
 
@@ -76,7 +67,7 @@ llm = Llama(
76
  logger.info("[INFO] Model loaded successfully.")
77
 
78
  # -----------------------------
79
- # βœ… Chat Schema
80
  # -----------------------------
81
  class Query(BaseModel):
82
  question: str
@@ -86,27 +77,23 @@ class Query(BaseModel):
86
  # -----------------------------
87
  @app.post("/chat")
88
  async def chat(query: Query):
89
- question = query.question
90
  logger.info(f"[QUERY] {question}")
91
 
92
- # Step 1: Retrieve from Vector DB
93
  search_results = db.search(question)
 
94
 
95
- # Step 2: Filter by relevance threshold
96
- context_chunks = [res for res in search_results if res["relevance_score"] > db.relevance_threshold]
97
-
98
- # Debug: log all retrieved chunks with scores
99
  for i, r in enumerate(search_results):
100
- logger.info(f"[DEBUG] Chunk {i+1} Score: {r['relevance_score']:.4f} | Snippet: {r['text'][:80]}")
101
 
102
- # Step 3: Fallback if none passed threshold
103
- if not context_chunks:
104
  logger.warning("[WARN] No relevant context passed threshold. Using top-2 chunks as fallback.")
105
- context_chunks = search_results[:2] # fallback even if less relevant
106
 
107
- context = "\n".join([res["text"] for res in context_chunks])
108
 
109
- # Step 4: Construct prompt
110
  prompt = f"""You are a helpful assistant trained on NEEPCO Delegation of Powers policies.
111
 
112
  ### Relevant Context:
@@ -117,7 +104,7 @@ async def chat(query: Query):
117
 
118
  ### Answer:"""
119
 
120
- # Step 5: Inference
121
  response = llm(prompt, max_tokens=200, stop=["###"], temperature=0.2)
122
  answer = response["choices"][0]["text"].strip()
123
 
 
 
 
 
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
 
4
  import logging
5
+ import os
6
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
7
 
8
  # -----------------------------
 
16
  # -----------------------------
17
  app = FastAPI()
18
 
 
 
 
19
  @app.get("/")
20
  async def root():
21
  return {"status": "βœ… Server is running and ready."}
22
 
23
  # -----------------------------
24
+ # βœ… Feedback Collection
25
  # -----------------------------
26
  class Feedback(BaseModel):
27
  question: str
 
43
  db = PolicyVectorDB(
44
  persist_directory=DB_PERSIST_DIRECTORY,
45
  top_k_default=7,
46
+ relevance_threshold=0.45 # Lowered for broader retrieval
47
  )
 
48
  if not ensure_db_populated(db, CHUNKS_FILE_PATH):
49
  logger.warning("[WARNING] DB not populated. Chunks file may be missing.")
50
  else:
51
  logger.info("[INFO] Vector DB ready.")
52
 
53
  # -----------------------------
54
+ # βœ… Load GGUF Model (llama-cpp-python)
55
  # -----------------------------
56
  MODEL_PATH = "/app/tinyllama_dop_q4_k_m.gguf"
57
 
 
67
  logger.info("[INFO] Model loaded successfully.")
68
 
69
  # -----------------------------
70
+ # βœ… Query Schema
71
  # -----------------------------
72
  class Query(BaseModel):
73
  question: str
 
77
  # -----------------------------
78
  @app.post("/chat")
79
  async def chat(query: Query):
80
+ question = query.question.strip()
81
  logger.info(f"[QUERY] {question}")
82
 
 
83
  search_results = db.search(question)
84
+ filtered = [r for r in search_results if r["relevance_score"] > db.relevance_threshold]
85
 
86
+ # Logging for debug
 
 
 
87
  for i, r in enumerate(search_results):
88
+ logger.info(f"[DEBUG] Chunk {i+1} | Score: {r['relevance_score']:.4f} | Snippet: {r['text'][:80]}")
89
 
90
+ if not filtered:
 
91
  logger.warning("[WARN] No relevant context passed threshold. Using top-2 chunks as fallback.")
92
+ filtered = search_results[:2]
93
 
94
+ context = "\n".join([r["text"] for r in filtered]) or "No relevant context found."
95
 
96
+ # Prompt Template
97
  prompt = f"""You are a helpful assistant trained on NEEPCO Delegation of Powers policies.
98
 
99
  ### Relevant Context:
 
104
 
105
  ### Answer:"""
106
 
107
+ # Run LLM
108
  response = llm(prompt, max_tokens=200, stop=["###"], temperature=0.2)
109
  answer = response["choices"][0]["text"].strip()
110