Kalpokoch commited on
Commit
e6480ce
·
verified ·
1 Parent(s): 488fca2

Update app/app.py

Browse files
Files changed (1) hide show
  1. app/app.py +44 -32
app/app.py CHANGED
@@ -1,45 +1,57 @@
1
- from fastapi import FastAPI, Request
2
- from pydantic import BaseModel
3
  import os
4
  import logging
 
 
5
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
6
- from llama_cpp import Llama
7
 
8
- app = FastAPI()
 
 
 
 
 
 
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger("app")
11
 
12
- # --- Load Vector DB ---
13
- VECTOR_DB_DIR = "/app/vector_database"
14
- CHUNKS_FILE = "processed_chunks.json"
15
- vector_db = PolicyVectorDB(persist_directory=VECTOR_DB_DIR)
16
- ensure_db_populated(vector_db, CHUNKS_FILE)
17
-
18
- # --- Load Quantized GGUF Model ---
19
- MODEL_PATH = "dop-phi-1.5-Q4_K_M.gguf"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- llm = Llama(
22
- model_path=MODEL_PATH,
23
- n_ctx=2048,
24
- n_threads=os.cpu_count(),
25
- use_mlock=False
26
- )
27
 
 
28
  class QueryRequest(BaseModel):
29
  query: str
 
30
 
 
31
  @app.post("/query")
32
- async def query_policy(request: QueryRequest):
33
- query_text = request.query
34
- chunks = vector_db.search(query_text, top_k=5)
35
-
36
- if not chunks:
37
- return {"answer": "No relevant information found.", "chunks": []}
38
-
39
- context = "\n\n".join([c['text'] for c in chunks])
40
- prompt = f"Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {query_text}\nAnswer:"
41
-
42
- response = llm(prompt, max_tokens=512, stop=["\n\n", "###"], echo=False)
43
- answer = response["choices"][0]["text"].strip()
44
-
45
- return {"answer": answer, "chunks": chunks}
 
 
 
1
  import os
2
  import logging
3
+ from fastapi import FastAPI, HTTPException
4
+ from pydantic import BaseModel
5
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
 
6
 
7
+ # --- GGUF Model Config ---
8
+ MODEL_PATH = "dop-phi-1.5-Q4_K_M.gguf"
9
+ MODEL_URL = (
10
+ "https://huggingface.co/Kalpokoch/QuantizedFineTunedPhi1.5/resolve/main/dop-phi-1.5-Q4_K_M.gguf"
11
+ )
12
+
13
+ # Logging setup
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger("app")
16
 
17
+ # --- Download model if not present ---
18
+ def maybe_download_model():
19
+ if not os.path.exists(MODEL_PATH):
20
+ logger.info("Downloading GGUF model from Hugging Face...")
21
+ import requests
22
+
23
+ try:
24
+ with requests.get(MODEL_URL, stream=True) as r:
25
+ r.raise_for_status()
26
+ with open(MODEL_PATH, "wb") as f:
27
+ for chunk in r.iter_content(chunk_size=8192):
28
+ if chunk:
29
+ f.write(chunk)
30
+ logger.info("Model downloaded successfully.")
31
+ except Exception as e:
32
+ logger.error(f"Failed to download GGUF model: {e}")
33
+ raise
34
+
35
+ maybe_download_model()
36
+
37
+ # Initialize FastAPI
38
+ app = FastAPI()
39
 
40
+ # Initialize vector DB
41
+ vector_db = PolicyVectorDB(persist_directory="vector_database", relevance_threshold=0.0)
42
+ ensure_db_populated(vector_db, "processed_chunks.json")
 
 
 
43
 
44
+ # Request schema
45
  class QueryRequest(BaseModel):
46
  query: str
47
+ top_k: int = 5
48
 
49
+ # API endpoint
50
  @app.post("/query")
51
+ async def query_vector_db(req: QueryRequest):
52
+ try:
53
+ results = vector_db.search(query_text=req.query, top_k=req.top_k)
54
+ return {"results": results}
55
+ except Exception as e:
56
+ logger.error(f"Query error: {e}", exc_info=True)
57
+ raise HTTPException(status_code=500, detail="Internal server error")