omaryasserhassan commited on
Commit
b5dfa0f
·
verified ·
1 Parent(s): 35678d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -6,8 +6,7 @@ from llama_cpp import Llama
6
 
7
  REPO_ID = "bartowski/Llama-3.2-3B-Instruct-GGUF"
8
  FILENAME = "Llama-3.2-3B-Instruct-Q4_K_M.gguf"
9
-
10
- CACHE_DIR = os.environ.get("HF_HOME", "/tmp/hf_cache")
11
  os.makedirs(CACHE_DIR, exist_ok=True)
12
 
13
  app = FastAPI()
@@ -17,6 +16,7 @@ def get_model():
17
  global _model
18
  if _model is not None:
19
  return _model
 
20
  local_path = hf_hub_download(
21
  repo_id=REPO_ID,
22
  filename=FILENAME,
@@ -50,3 +50,11 @@ def generate_text(req: PromptRequest):
50
  return {"ok": True, "response": output["choices"][0]["text"]}
51
  except Exception as e:
52
  raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
6
 
7
  REPO_ID = "bartowski/Llama-3.2-3B-Instruct-GGUF"
8
  FILENAME = "Llama-3.2-3B-Instruct-Q4_K_M.gguf"
9
+ CACHE_DIR = "/app/models" # matches Dockerfile pre-download
 
10
  os.makedirs(CACHE_DIR, exist_ok=True)
11
 
12
  app = FastAPI()
 
16
  global _model
17
  if _model is not None:
18
  return _model
19
+
20
  local_path = hf_hub_download(
21
  repo_id=REPO_ID,
22
  filename=FILENAME,
 
50
  return {"ok": True, "response": output["choices"][0]["text"]}
51
  except Exception as e:
52
  raise HTTPException(status_code=500, detail=str(e))
53
+
54
+ @app.get("/health")
55
+ def health():
56
+ try:
57
+ _ = get_model()
58
+ return {"ok": True}
59
+ except Exception as e:
60
+ return {"ok": False, "error": str(e)}