Spaces:
Running
Running
Benjamin Consolvo
commited on
Commit
·
6128b93
1
Parent(s):
81fdd84
optimum intel
Browse files- app.py +10 -6
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -2,14 +2,18 @@ import gradio as gr
|
|
| 2 |
from transformers import pipeline
|
| 3 |
import time
|
| 4 |
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
# sparse_qa_pipeline = pipeline(task="question-answering",model="Intel/distilbert-base-uncased-squadv1.1-sparse-80-1x4-block-pruneofa-int8")
|
| 7 |
|
| 8 |
-
dense_qa_pipeline = pipeline(task="question-answering",model="csarron/bert-base-uncased-squad-v1")
|
| 9 |
-
|
| 10 |
|
| 11 |
-
def greet(name):
|
| 12 |
-
return "Hello " + name + "!!"
|
| 13 |
|
| 14 |
def predict(context,question):
|
| 15 |
'''
|
|
@@ -22,7 +26,7 @@ def predict(context,question):
|
|
| 22 |
'''
|
| 23 |
|
| 24 |
sparse_start_time = time.perf_counter()
|
| 25 |
-
sparse_predictions =
|
| 26 |
sparse_end_time = time.perf_counter()
|
| 27 |
sparse_duration = (sparse_end_time - sparse_start_time) * 1000
|
| 28 |
sparse_answer = sparse_predictions['answer']
|
|
|
|
| 2 |
from transformers import pipeline
|
| 3 |
import time
|
| 4 |
|
| 5 |
+
from optimum.intel.neural_compressor import IncQuantizedModelForQuestionAnswering
|
| 6 |
+
|
| 7 |
+
# model_id = "Intel/bert-base-uncased-squadv1.1-sparse-80-1x4-block-pruneofa"
|
| 8 |
+
model_id = "Intel/distilbert-base-uncased-squadv1.1-sparse-80-1x4-block-pruneofa-int8"
|
| 9 |
+
int8_model = IncQuantizedModelForQuestionAnswering.from_pretrained(model_id)
|
| 10 |
+
|
| 11 |
+
# sparse_qa_pipeline = pipeline(task="question-answering",model="Intel/bert-base-uncased-squadv1.1-sparse-80-1x4-block-pruneofa")
|
| 12 |
# sparse_qa_pipeline = pipeline(task="question-answering",model="Intel/distilbert-base-uncased-squadv1.1-sparse-80-1x4-block-pruneofa-int8")
|
| 13 |
|
| 14 |
+
# dense_qa_pipeline = pipeline(task="question-answering",model="csarron/bert-base-uncased-squad-v1")
|
| 15 |
+
dense_qa_pipeline = pipeline(task="question-answering",model="distilbert-base-uncased-distilled-squad")
|
| 16 |
|
|
|
|
|
|
|
| 17 |
|
| 18 |
def predict(context,question):
|
| 19 |
'''
|
|
|
|
| 26 |
'''
|
| 27 |
|
| 28 |
sparse_start_time = time.perf_counter()
|
| 29 |
+
sparse_predictions = int8_model(context=context,question=question)
|
| 30 |
sparse_end_time = time.perf_counter()
|
| 31 |
sparse_duration = (sparse_end_time - sparse_start_time) * 1000
|
| 32 |
sparse_answer = sparse_predictions['answer']
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
transformers
|
| 2 |
torch
|
| 3 |
-
tensorflow
|
|
|
|
|
|
| 1 |
transformers
|
| 2 |
torch
|
| 3 |
+
tensorflow
|
| 4 |
+
optimum.intel
|