Spaces:
Running
on
Zero
Running
on
Zero
Restore spaces.GPU usage
Browse files- README.md +1 -1
- __pycache__/app.cpython-313.pyc +0 -0
- app.py +14 -0
- requirements.txt +1 -0
README.md
CHANGED
|
@@ -21,7 +21,7 @@ endpoint via the `HF_ROUTER_API` environment variable.
|
|
| 21 |
| File | Purpose |
|
| 22 |
| ---- | ------- |
|
| 23 |
| `app.py` | Loads the merged checkpoint on demand (tries `MODEL_REPO` first, then `router-qwen3-32b-merged`, `router-gemma3-merged`), exposes a `/v1/generate` API, and serves a small HTML console at `/gradio`. |
|
| 24 |
-
| `requirements.txt` | Minimal dependency set (transformers, bitsandbytes, torch, fastapi). |
|
| 25 |
| `.huggingface/spaces.yml` | Configures the Space for ZeroGPU hardware and disables automatic sleep. |
|
| 26 |
|
| 27 |
## Deployment Steps
|
|
|
|
| 21 |
| File | Purpose |
|
| 22 |
| ---- | ------- |
|
| 23 |
| `app.py` | Loads the merged checkpoint on demand (tries `MODEL_REPO` first, then `router-qwen3-32b-merged`, `router-gemma3-merged`), exposes a `/v1/generate` API, and serves a small HTML console at `/gradio`. |
|
| 24 |
+
| `requirements.txt` | Minimal dependency set (transformers, bitsandbytes, torch, fastapi, spaces). |
|
| 25 |
| `.huggingface/spaces.yml` | Configures the Space for ZeroGPU hardware and disables automatic sleep. |
|
| 26 |
|
| 27 |
## Deployment Steps
|
__pycache__/app.cpython-313.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-313.pyc and b/__pycache__/app.cpython-313.pyc differ
|
|
|
app.py
CHANGED
|
@@ -9,6 +9,11 @@ from fastapi import FastAPI, HTTPException
|
|
| 9 |
from fastapi.responses import HTMLResponse
|
| 10 |
from pydantic import BaseModel
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
from transformers import (
|
| 13 |
AutoModelForCausalLM,
|
| 14 |
AutoTokenizer,
|
|
@@ -66,6 +71,15 @@ class GenerateResponse(BaseModel):
|
|
| 66 |
_MODEL = None
|
| 67 |
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
def get_model() -> AutoModelForCausalLM:
|
| 70 |
global _MODEL
|
| 71 |
if _MODEL is None:
|
|
|
|
| 9 |
from fastapi.responses import HTMLResponse
|
| 10 |
from pydantic import BaseModel
|
| 11 |
|
| 12 |
+
try:
|
| 13 |
+
import spaces # type: ignore
|
| 14 |
+
except Exception: # pragma: no cover
|
| 15 |
+
spaces = None
|
| 16 |
+
|
| 17 |
from transformers import (
|
| 18 |
AutoModelForCausalLM,
|
| 19 |
AutoTokenizer,
|
|
|
|
| 71 |
_MODEL = None
|
| 72 |
|
| 73 |
|
| 74 |
+
def _spaces_gpu(*args, **kwargs):
|
| 75 |
+
if spaces is None:
|
| 76 |
+
def identity(fn):
|
| 77 |
+
return fn
|
| 78 |
+
return identity
|
| 79 |
+
return spaces.GPU(*args, **kwargs)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
@_spaces_gpu(duration=120)
|
| 83 |
def get_model() -> AutoModelForCausalLM:
|
| 84 |
global _MODEL
|
| 85 |
if _MODEL is None:
|
requirements.txt
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
bitsandbytes>=0.41.0
|
| 2 |
fastapi>=0.110.0
|
|
|
|
| 3 |
torch>=2.1.0
|
| 4 |
transformers>=4.40.0
|
| 5 |
uvicorn>=0.22.0
|
|
|
|
| 1 |
bitsandbytes>=0.41.0
|
| 2 |
fastapi>=0.110.0
|
| 3 |
+
spaces>=0.40.0
|
| 4 |
torch>=2.1.0
|
| 5 |
transformers>=4.40.0
|
| 6 |
uvicorn>=0.22.0
|