Spaces:
Running
on
Zero
Running
on
Zero
Load model during startup for GPU reservation
Browse files- __pycache__/app.cpython-313.pyc +0 -0
- app.py +10 -0
__pycache__/app.cpython-313.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-313.pyc and b/__pycache__/app.cpython-313.pyc differ
|
|
|
app.py
CHANGED
|
@@ -141,6 +141,16 @@ def healthcheck() -> dict[str, str]:
|
|
| 141 |
return {"status": "ok", "model": MODEL_ID}
|
| 142 |
|
| 143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
@fastapi_app.post("/v1/generate", response_model=GenerateResponse)
|
| 145 |
def generate_endpoint(payload: GeneratePayload) -> GenerateResponse:
|
| 146 |
try:
|
|
|
|
| 141 |
return {"status": "ok", "model": MODEL_ID}
|
| 142 |
|
| 143 |
|
| 144 |
+
@fastapi_app.on_event("startup")
|
| 145 |
+
def warm_start() -> None:
|
| 146 |
+
"""Ensure the GPU reservation is established during startup."""
|
| 147 |
+
try:
|
| 148 |
+
get_model()
|
| 149 |
+
except Exception as exc:
|
| 150 |
+
# Surface the failure early so the container exits with a useful log.
|
| 151 |
+
raise RuntimeError(f"Model warm-up failed: {exc}") from exc
|
| 152 |
+
|
| 153 |
+
|
| 154 |
@fastapi_app.post("/v1/generate", response_model=GenerateResponse)
|
| 155 |
def generate_endpoint(payload: GeneratePayload) -> GenerateResponse:
|
| 156 |
try:
|