Spaces:

Kalpokoch
/

ChatbotDemo

Running

App Files Files

Kalpokoch commited on Aug 4

Commit

ec6e541

verified ·

1 Parent(s): 6d60652

Update app/app.py

Browse files

Files changed (1) hide show

app/app.py +35 -64

app/app.py CHANGED Viewed

@@ -1,72 +1,43 @@
-# === app.py ===
-import os
-import json
-import logging
-from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from llama_cpp import Llama
-from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
-logger = logging.getLogger("app")
-logging.basicConfig(level=logging.INFO)
-app = FastAPI()
-# Load the quantized model from disk
 MODEL_PATH = "/app/dop-phi-1.5-Q4_K_M.gguf"
-if not os.path.exists(MODEL_PATH):
-    raise FileNotFoundError(f"Model file not found: {MODEL_PATH}")
-llm = Llama(
-    model_path=MODEL_PATH,
-    n_ctx=2048,
-    n_threads=4,
-    n_batch=8,
-    verbose=True
-)
-# Initialize vector DB
-VECTOR_DB_PATH = "/app/vector_database"
-CHUNKS_PATH = "processed_chunks.json"  # Make sure this exists
-vector_db = PolicyVectorDB(persist_directory=VECTOR_DB_PATH, relevance_threshold=0.0)
-ensure_db_populated(vector_db, CHUNKS_PATH)
-# Request format
-class QueryRequest(BaseModel):
-    query: str
-    top_k: int = 5
-@app.get("/")
-def read_root():
-    return {"status": "RAG Chatbot with quantized Phi-1.5 is running."}
-@app.post("/query")
-def query_policies(request: QueryRequest):
-    try:
-        logger.info(f"Received query: {request.query}")
-        search_results = vector_db.search(request.query, top_k=request.top_k)
-        context_str = "\n\n".join([r['text'] for r in search_results])
-        prompt = f"""You are a helpful assistant specialized in NEEPCO policies. Use the context below to answer the question.
-Context:
-{context_str}
-Question: {request.query}
-Answer:"""
-        response = llm(prompt=prompt, max_tokens=512, temperature=0.2)
-        if isinstance(response, dict):
-            answer = response.get("choices", [{}])[0].get("text", "")
-        else:
-            answer = str(response)
-        return {
-            "answer": answer.strip(),
-            "sources": search_results
-        }
-    except Exception as e:
-        logger.error(f"Query failed: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail=str(e))

+from fastapi import FastAPI
 from pydantic import BaseModel
 from llama_cpp import Llama
+import os
+import requests
+from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
+MODEL_URL = "https://huggingface.co/Kalpokoch/QuantizedFineTunedPhi1.5/resolve/main/dop-phi-1.5-Q4_K_M.gguf"
 MODEL_PATH = "/app/dop-phi-1.5-Q4_K_M.gguf"
+CHUNKS_PATH = "/app/processed_chunks.json"
+# Download the model if not already present
+def download_model():
+    if not os.path.exists(MODEL_PATH):
+        print("🔽 Downloading model...")
+        response = requests.get(MODEL_URL, stream=True)
+        with open(MODEL_PATH, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        print("✅ Model downloaded.")
+download_model()
+# Initialize model and vector database
+llm = Llama(model_path=MODEL_PATH, n_ctx=2048)
+vector_db = PolicyVectorDB(CHUNKS_PATH)
+ensure_db_populated(vector_db)
+# FastAPI app setup
+app = FastAPI()
+class Query(BaseModel):
+    question: str
+@app.post("/ask")
+async def ask_question(query: Query):
+    question = query.question
+    results = vector_db.query(question)
+    context_text = "\n".join([item["text"] for item in results])
+    prompt = f"Context:\n{context_text}\n\nQuestion: {question}\nAnswer:"
+    output = llm(prompt=prompt, max_tokens_