Kalpokoch commited on
Commit
ec6e541
·
verified ·
1 Parent(s): 6d60652

Update app/app.py

Browse files
Files changed (1) hide show
  1. app/app.py +35 -64
app/app.py CHANGED
@@ -1,72 +1,43 @@
1
- # === app.py ===
2
- import os
3
- import json
4
- import logging
5
- from fastapi import FastAPI, HTTPException
6
  from pydantic import BaseModel
7
  from llama_cpp import Llama
8
- from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
9
-
10
- logger = logging.getLogger("app")
11
- logging.basicConfig(level=logging.INFO)
12
 
13
- app = FastAPI()
14
 
15
- # Load the quantized model from disk
16
  MODEL_PATH = "/app/dop-phi-1.5-Q4_K_M.gguf"
17
- if not os.path.exists(MODEL_PATH):
18
- raise FileNotFoundError(f"Model file not found: {MODEL_PATH}")
19
-
20
- llm = Llama(
21
- model_path=MODEL_PATH,
22
- n_ctx=2048,
23
- n_threads=4,
24
- n_batch=8,
25
- verbose=True
26
- )
27
-
28
- # Initialize vector DB
29
- VECTOR_DB_PATH = "/app/vector_database"
30
- CHUNKS_PATH = "processed_chunks.json" # Make sure this exists
31
- vector_db = PolicyVectorDB(persist_directory=VECTOR_DB_PATH, relevance_threshold=0.0)
32
- ensure_db_populated(vector_db, CHUNKS_PATH)
33
-
34
- # Request format
35
- class QueryRequest(BaseModel):
36
- query: str
37
- top_k: int = 5
38
-
39
- @app.get("/")
40
- def read_root():
41
- return {"status": "RAG Chatbot with quantized Phi-1.5 is running."}
42
-
43
- @app.post("/query")
44
- def query_policies(request: QueryRequest):
45
- try:
46
- logger.info(f"Received query: {request.query}")
47
- search_results = vector_db.search(request.query, top_k=request.top_k)
48
-
49
- context_str = "\n\n".join([r['text'] for r in search_results])
50
-
51
- prompt = f"""You are a helpful assistant specialized in NEEPCO policies. Use the context below to answer the question.
52
-
53
- Context:
54
- {context_str}
55
-
56
- Question: {request.query}
57
- Answer:"""
58
 
59
- response = llm(prompt=prompt, max_tokens=512, temperature=0.2)
 
60
 
61
- if isinstance(response, dict):
62
- answer = response.get("choices", [{}])[0].get("text", "")
63
- else:
64
- answer = str(response)
 
 
65
 
66
- return {
67
- "answer": answer.strip(),
68
- "sources": search_results
69
- }
70
- except Exception as e:
71
- logger.error(f"Query failed: {e}", exc_info=True)
72
- raise HTTPException(status_code=500, detail=str(e))
 
1
+ from fastapi import FastAPI
 
 
 
 
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
+ import os
5
+ import requests
 
 
6
 
7
+ from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
8
 
9
+ MODEL_URL = "https://huggingface.co/Kalpokoch/QuantizedFineTunedPhi1.5/resolve/main/dop-phi-1.5-Q4_K_M.gguf"
10
  MODEL_PATH = "/app/dop-phi-1.5-Q4_K_M.gguf"
11
+ CHUNKS_PATH = "/app/processed_chunks.json"
12
+
13
+ # Download the model if not already present
14
+ def download_model():
15
+ if not os.path.exists(MODEL_PATH):
16
+ print("🔽 Downloading model...")
17
+ response = requests.get(MODEL_URL, stream=True)
18
+ with open(MODEL_PATH, "wb") as f:
19
+ for chunk in response.iter_content(chunk_size=8192):
20
+ f.write(chunk)
21
+ print("✅ Model downloaded.")
22
+
23
+ download_model()
24
+
25
+ # Initialize model and vector database
26
+ llm = Llama(model_path=MODEL_PATH, n_ctx=2048)
27
+ vector_db = PolicyVectorDB(CHUNKS_PATH)
28
+ ensure_db_populated(vector_db)
29
+
30
+ # FastAPI app setup
31
+ app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ class Query(BaseModel):
34
+ question: str
35
 
36
+ @app.post("/ask")
37
+ async def ask_question(query: Query):
38
+ question = query.question
39
+ results = vector_db.query(question)
40
+ context_text = "\n".join([item["text"] for item in results])
41
+ prompt = f"Context:\n{context_text}\n\nQuestion: {question}\nAnswer:"
42
 
43
+ output = llm(prompt=prompt, max_tokens_