Alovestocode commited on
Commit
6a8403c
·
verified ·
1 Parent(s): 00b5731

Load model during startup for GPU reservation

Browse files
Files changed (2) hide show
  1. __pycache__/app.cpython-313.pyc +0 -0
  2. app.py +10 -0
__pycache__/app.cpython-313.pyc CHANGED
Binary files a/__pycache__/app.cpython-313.pyc and b/__pycache__/app.cpython-313.pyc differ
 
app.py CHANGED
@@ -141,6 +141,16 @@ def healthcheck() -> dict[str, str]:
141
  return {"status": "ok", "model": MODEL_ID}
142
 
143
 
 
 
 
 
 
 
 
 
 
 
144
  @fastapi_app.post("/v1/generate", response_model=GenerateResponse)
145
  def generate_endpoint(payload: GeneratePayload) -> GenerateResponse:
146
  try:
 
141
  return {"status": "ok", "model": MODEL_ID}
142
 
143
 
144
+ @fastapi_app.on_event("startup")
145
+ def warm_start() -> None:
146
+ """Ensure the GPU reservation is established during startup."""
147
+ try:
148
+ get_model()
149
+ except Exception as exc:
150
+ # Surface the failure early so the container exits with a useful log.
151
+ raise RuntimeError(f"Model warm-up failed: {exc}") from exc
152
+
153
+
154
  @fastapi_app.post("/v1/generate", response_model=GenerateResponse)
155
  def generate_endpoint(payload: GeneratePayload) -> GenerateResponse:
156
  try: