Spaces:

Kalpokoch
/

ChatbotDemo

Running

App Files Files

Kalpokoch commited on Jul 28

Commit

95eb732

verified ·

1 Parent(s): 9d6d993

Update app/app.py

Browse files

Files changed (1) hide show

app/app.py +14 -27

app/app.py CHANGED Viewed

@@ -1,13 +1,8 @@
-# =======================
-# ⚡ GGUF + llama-cpp-python FastAPI App for HF Spaces (CPU Optimized)
-# =======================
 from fastapi import FastAPI
 from pydantic import BaseModel
 from llama_cpp import Llama
-import os
 import logging
-from typing import Optional
 from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
 # -----------------------------
@@ -21,15 +16,12 @@ logger = logging.getLogger("app")
 # -----------------------------
 app = FastAPI()
-# -----------------------------
-# ✅ Health Check Endpoint
-# -----------------------------
 @app.get("/")
 async def root():
     return {"status": "✅ Server is running and ready."}
 # -----------------------------
-# ✅ Feedback Collection Endpoint
 # -----------------------------
 class Feedback(BaseModel):
     question: str
@@ -51,16 +43,15 @@ logger.info("[INFO] Initializing vector DB...")
 db = PolicyVectorDB(
     persist_directory=DB_PERSIST_DIRECTORY,
     top_k_default=7,
-    relevance_threshold=0.45  # Lowered for broader matching
 )
 if not ensure_db_populated(db, CHUNKS_FILE_PATH):
     logger.warning("[WARNING] DB not populated. Chunks file may be missing.")
 else:
     logger.info("[INFO] Vector DB ready.")
 # -----------------------------
-# ✅ Load GGUF Model with llama-cpp-python
 # -----------------------------
 MODEL_PATH = "/app/tinyllama_dop_q4_k_m.gguf"
@@ -76,7 +67,7 @@ llm = Llama(
 logger.info("[INFO] Model loaded successfully.")
 # -----------------------------
-# ✅ Chat Schema
 # -----------------------------
 class Query(BaseModel):
     question: str
@@ -86,27 +77,23 @@ class Query(BaseModel):
 # -----------------------------
 @app.post("/chat")
 async def chat(query: Query):
-    question = query.question
     logger.info(f"[QUERY] {question}")
-    # Step 1: Retrieve from Vector DB
     search_results = db.search(question)
-    # Step 2: Filter by relevance threshold
-    context_chunks = [res for res in search_results if res["relevance_score"] > db.relevance_threshold]
-    # Debug: log all retrieved chunks with scores
     for i, r in enumerate(search_results):
-        logger.info(f"[DEBUG] Chunk {i+1} Score: {r['relevance_score']:.4f} | Snippet: {r['text'][:80]}")
-    # Step 3: Fallback if none passed threshold
-    if not context_chunks:
         logger.warning("[WARN] No relevant context passed threshold. Using top-2 chunks as fallback.")
-        context_chunks = search_results[:2]  # fallback even if less relevant
-    context = "\n".join([res["text"] for res in context_chunks])
-    # Step 4: Construct prompt
     prompt = f"""You are a helpful assistant trained on NEEPCO Delegation of Powers policies.
 ### Relevant Context:
@@ -117,7 +104,7 @@ async def chat(query: Query):
 ### Answer:"""
-    # Step 5: Inference
     response = llm(prompt, max_tokens=200, stop=["###"], temperature=0.2)
     answer = response["choices"][0]["text"].strip()

 from fastapi import FastAPI
 from pydantic import BaseModel
 from llama_cpp import Llama
 import logging
+import os
 from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
 # -----------------------------
 # -----------------------------
 app = FastAPI()
 @app.get("/")
 async def root():
     return {"status": "✅ Server is running and ready."}
 # -----------------------------
+# ✅ Feedback Collection
 # -----------------------------
 class Feedback(BaseModel):
     question: str
 db = PolicyVectorDB(
     persist_directory=DB_PERSIST_DIRECTORY,
     top_k_default=7,
+    relevance_threshold=0.45  # Lowered for broader retrieval
 )
 if not ensure_db_populated(db, CHUNKS_FILE_PATH):
     logger.warning("[WARNING] DB not populated. Chunks file may be missing.")
 else:
     logger.info("[INFO] Vector DB ready.")
 # -----------------------------
+# ✅ Load GGUF Model (llama-cpp-python)
 # -----------------------------
 MODEL_PATH = "/app/tinyllama_dop_q4_k_m.gguf"
 logger.info("[INFO] Model loaded successfully.")
 # -----------------------------
+# ✅ Query Schema
 # -----------------------------
 class Query(BaseModel):
     question: str
 # -----------------------------
 @app.post("/chat")
 async def chat(query: Query):
+    question = query.question.strip()
     logger.info(f"[QUERY] {question}")
     search_results = db.search(question)
+    filtered = [r for r in search_results if r["relevance_score"] > db.relevance_threshold]
+    # Logging for debug
     for i, r in enumerate(search_results):
+        logger.info(f"[DEBUG] Chunk {i+1} | Score: {r['relevance_score']:.4f} | Snippet: {r['text'][:80]}")
+    if not filtered:
         logger.warning("[WARN] No relevant context passed threshold. Using top-2 chunks as fallback.")
+        filtered = search_results[:2]
+    context = "\n".join([r["text"] for r in filtered]) or "No relevant context found."
+    # Prompt Template
     prompt = f"""You are a helpful assistant trained on NEEPCO Delegation of Powers policies.
 ### Relevant Context:
 ### Answer:"""
+    # Run LLM
     response = llm(prompt, max_tokens=200, stop=["###"], temperature=0.2)
     answer = response["choices"][0]["text"].strip()