Spaces:
Running
on
Zero
Running
on
Zero
Lord-Raven
commited on
Commit
·
5deb965
1
Parent(s):
402f3c1
Trying ONNX models on CPU.
Browse files- app.py +8 -4
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -5,7 +5,8 @@ import json
|
|
| 5 |
import onnxruntime
|
| 6 |
import time
|
| 7 |
from datetime import datetime
|
| 8 |
-
from transformers import pipeline
|
|
|
|
| 9 |
from fastapi import FastAPI
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
|
|
@@ -24,10 +25,13 @@ print(f"Is CUDA available: {torch.cuda.is_available()}")
|
|
| 24 |
if torch.cuda.is_available():
|
| 25 |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
| 26 |
|
| 27 |
-
model_name = "MoritzLaurer/
|
| 28 |
-
tokenizer_name = "MoritzLaurer/
|
| 29 |
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
| 31 |
classifier_gpu = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name, device="cuda:0") if torch.cuda.is_available() else classifier_cpu
|
| 32 |
|
| 33 |
def classify(data_string, request: gradio.Request):
|
|
|
|
| 5 |
import onnxruntime
|
| 6 |
import time
|
| 7 |
from datetime import datetime
|
| 8 |
+
from transformers import AutoTokenizer, pipeline
|
| 9 |
+
from optimum.onnxruntime import ORTModelForSequenceClassification
|
| 10 |
from fastapi import FastAPI
|
| 11 |
from fastapi.middleware.cors import CORSMiddleware
|
| 12 |
|
|
|
|
| 25 |
if torch.cuda.is_available():
|
| 26 |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
| 27 |
|
| 28 |
+
model_name = "MoritzLaurer/ModernBERT-large-zeroshot-v2.0"
|
| 29 |
+
tokenizer_name = "MoritzLaurer/ModernBERT-large-zeroshot-v2.0"
|
| 30 |
|
| 31 |
+
model_cpu = ORTModelForSequenceClassification.from_pretrained(model_id=model_name, subfolder="onnx", file_name="model_uint8.onnx")
|
| 32 |
+
tokenizer_cpu = AutoTokenizer.from_pretrained(tokenizer_name)
|
| 33 |
+
|
| 34 |
+
classifier_cpu = pipeline(task="zero-shot-classification", model=model_cpu, tokenizer=tokenizer_cpu)
|
| 35 |
classifier_gpu = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name, device="cuda:0") if torch.cuda.is_available() else classifier_cpu
|
| 36 |
|
| 37 |
def classify(data_string, request: gradio.Request):
|
requirements.txt
CHANGED
|
@@ -5,4 +5,4 @@ json5==0.9.25
|
|
| 5 |
numpy
|
| 6 |
uvicorn
|
| 7 |
optimum[onnxruntime-gpu]==1.24.0
|
| 8 |
-
transformers==4.
|
|
|
|
| 5 |
numpy
|
| 6 |
uvicorn
|
| 7 |
optimum[onnxruntime-gpu]==1.24.0
|
| 8 |
+
transformers==4.48.0
|