Spaces:

sentence-transformers
/

backend-export

Running on T4

App Files Files Community

Tom Aarsen commited on Aug 12

Commit

3921dd6

1 Parent(s): eded98b

Add SparseEncoder & CrossEncoder support to backend-export

Browse files

Files changed (5) hide show

README.md +1 -1
app.py +490 -112
images/backends_benchmark_cpu.png +0 -0
images/backends_benchmark_gpu.png +0 -0
requirements.txt +3 -3

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: ⚙️
 colorFrom: indigo
 colorTo: indigo
 sdk: gradio
-sdk_version: 5.5.0
 app_file: app.py
 pinned: false
 license: apache-2.0

 colorFrom: indigo
 colorTo: indigo
 sdk: gradio
+sdk_version: 5.42.0
 app_file: app.py
 pinned: false
 license: apache-2.0

app.py CHANGED Viewed

@@ -1,18 +1,30 @@
 from enum import Enum
-from functools import partial
 from pathlib import Path
 from typing import Optional, Tuple
 import gradio as gr
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 import huggingface_hub
-from sentence_transformers import SentenceTransformer
 from sentence_transformers import (
     export_dynamic_quantized_onnx_model as st_export_dynamic_quantized_onnx_model,
     export_optimized_onnx_model as st_export_optimized_onnx_model,
     export_static_quantized_openvino_model as st_export_static_quantized_openvino_model,
 )
-from huggingface_hub import model_info, upload_folder, get_repo_discussions, list_repo_commits, HfFileSystem
-from huggingface_hub.errors import RepositoryNotFoundError
 from optimum.intel import OVQuantizationConfig
 from tempfile import TemporaryDirectory
@@ -29,9 +41,20 @@ class Backend(Enum):
         return self.value
 backends = [str(backend) for backend in Backend]
 FILE_SYSTEM = HfFileSystem()
 def is_new_model(model_id: str) -> bool:
     """
     Check if the model ID exists on the Hugging Face Hub. If we get a request error, then we
@@ -50,12 +73,59 @@ def is_sentence_transformer_model(model_id: str) -> bool:
     return "sentence-transformers" in model_info(model_id).tags
 def get_last_commit(model_id: str) -> str:
     """
     Get the last commit hash of the model ID.
     """
     return f"https://huggingface.co/{model_id}/commit/{list_repo_commits(model_id)[0].commit_id}"
 def get_last_pr(model_id: str) -> Tuple[str, int]:
     last_pr = next(get_repo_discussions(model_id))
     return last_pr.url, last_pr.num
@@ -80,12 +150,25 @@ def export_to_torch(model_id, create_pr, output_model_id):
     )
-def export_to_onnx(model_id: str, create_pr: bool, output_model_id: str, token: Optional[str] = None) -> None:
     if does_file_glob_exist(output_model_id, "**/model.onnx"):
         raise FileExistsError("An ONNX model already exists in the repository")
-    model = SentenceTransformer(model_id, backend="onnx")
     commit_message = "Add exported onnx model 'model.onnx'"
     if is_new_model(output_model_id):
@@ -110,22 +193,27 @@ Hello!
 ## Tip:
 Consider testing this pull request before merging by loading the model from this PR with the `revision` argument:
 ```python
-from sentence_transformers import SentenceTransformer
 # TODO: Fill in the PR number
 pr_number = 2
-model = SentenceTransformer(
     "{output_model_id}",
     revision=f"refs/pr/{{pr_number}}",
     backend="onnx",
 )
 # Verify that everything works as expected
-embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
 print(embeddings.shape)
 similarities = model.similarity(embeddings, embeddings)
-print(similarities)
 ```
 """
@@ -139,16 +227,24 @@ print(similarities)
                 token=token,
             )
-def export_to_onnx_snippet(model_id: str, create_pr: bool, output_model_id: str) -> str:
-    return """\
 pip install sentence_transformers[onnx-gpu]
 # or
 pip install sentence_transformers[onnx]
-""", f"""\
-from sentence_transformers import SentenceTransformer
 # 1. Load the model to be exported with the ONNX backend
-model = SentenceTransformer(
     "{model_id}",
     backend="onnx",
 )
@@ -160,31 +256,60 @@ model = SentenceTransformer(
     "{output_model_id}",
     create_pr=True,
 )'''}
-""", f"""\
-from sentence_transformers import SentenceTransformer
 # 1. Load the model from the Hugging Face Hub
 # (until merged) Use the `revision` argument to load the model from the PR
 pr_number = 2
-model = SentenceTransformer(
     "{output_model_id}",
     revision=f"refs/pr/{{pr_number}}",
     backend="onnx",
 )
 # 2. Inference works as normal
 embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
 similarities = model.similarity(embeddings, embeddings)
 """
 def export_to_onnx_dynamic_quantization(
-    model_id: str, create_pr: bool, output_model_id: str, onnx_quantization_config: str, token: Optional[str] = None
 ) -> None:
-    if does_file_glob_exist(output_model_id, f"onnx/model_qint8_{onnx_quantization_config}.onnx"):
-        raise FileExistsError("The quantized ONNX model already exists in the repository")
-    model = SentenceTransformer(model_id, backend="onnx")
     if not create_pr and is_new_model(output_model_id):
         model.push_to_hub(repo_id=output_model_id, token=token)
@@ -202,7 +327,20 @@ def export_to_onnx_dynamic_quantization(
         )
     except ValueError:
         # Currently, quantization with optimum has some issues if there's already an ONNX model in a subfolder
-        model = SentenceTransformer(model_id, backend="onnx", model_kwargs={"export": True})
         st_export_dynamic_quantized_onnx_model(
             model,
             quantization_config=onnx_quantization_config,
@@ -213,21 +351,31 @@ def export_to_onnx_dynamic_quantization(
     finally:
         huggingface_hub.upload_folder = original_upload_folder
 def export_to_onnx_dynamic_quantization_snippet(
-    model_id: str, create_pr: bool, output_model_id: str, onnx_quantization_config: str
-) -> str:
-    return """\
 pip install sentence_transformers[onnx-gpu]
 # or
 pip install sentence_transformers[onnx]
-""", f"""\
 from sentence_transformers import (
-    SentenceTransformer,
     export_dynamic_quantized_onnx_model,
 )
-# 1. Load the model to be quantized with the ONNX backend
-model = SentenceTransformer(
     "{model_id}",
     backend="onnx",
 )
@@ -240,29 +388,61 @@ export_dynamic_quantized_onnx_model(
     push_to_hub=True,
 {'''    create_pr=True,
 ''' if create_pr else ''})
-""", f"""\
-from sentence_transformers import SentenceTransformer
 # 1. Load the model from the Hugging Face Hub
 # (until merged) Use the `revision` argument to load the model from the PR
 pr_number = 2
-model = SentenceTransformer(
     "{output_model_id}",
     revision=f"refs/pr/{{pr_number}}",
     backend="onnx",
     model_kwargs={{"file_name": "model_qint8_{onnx_quantization_config}.onnx"}},
 )
 # 2. Inference works as normal
 embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
 similarities = model.similarity(embeddings, embeddings)
 """
-def export_to_onnx_optimization(model_id: str, create_pr: bool, output_model_id: str, onnx_optimization_config: str, token: Optional[str] = None) -> None:
-    if does_file_glob_exist(output_model_id, f"onnx/model_{onnx_optimization_config}.onnx"):
-        raise FileExistsError("The optimized ONNX model already exists in the repository")
-    model = SentenceTransformer(model_id, backend="onnx")
     if not create_pr and is_new_model(output_model_id):
         model.push_to_hub(repo_id=output_model_id, token=token)
@@ -281,19 +461,31 @@ def export_to_onnx_optimization(model_id: str, create_pr: bool, output_model_id:
     finally:
         huggingface_hub.upload_folder = original_upload_folder
-def export_to_onnx_optimization_snippet(model_id: str, create_pr: bool, output_model_id: str, onnx_optimization_config: str) -> str:
-    return """\
 pip install sentence_transformers[onnx-gpu]
 # or
 pip install sentence_transformers[onnx]
-""", f"""\
 from sentence_transformers import (
-    SentenceTransformer,
     export_optimized_onnx_model,
 )
 # 1. Load the model to be optimized with the ONNX backend
-model = SentenceTransformer(
     "{model_id}",
     backend="onnx",
 )
@@ -306,30 +498,56 @@ export_optimized_onnx_model(
     push_to_hub=True,
 {'''    create_pr=True,
 ''' if create_pr else ''})
-""", f"""\
-from sentence_transformers import SentenceTransformer
 # 1. Load the model from the Hugging Face Hub
 # (until merged) Use the `revision` argument to load the model from the PR
 pr_number = 2
-model = SentenceTransformer(
     "{output_model_id}",
     revision=f"refs/pr/{{pr_number}}",
     backend="onnx",
     model_kwargs={{"file_name": "model_{onnx_optimization_config}.onnx"}},
 )
 # 2. Inference works as normal
 embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
 similarities = model.similarity(embeddings, embeddings)
 """
-def export_to_openvino(model_id: str, create_pr: bool, output_model_id: str, token: Optional[str] = None) -> None:
     if does_file_glob_exist(output_model_id, "**/openvino_model.xml"):
         raise FileExistsError("The OpenVINO model already exists in the repository")
-    model = SentenceTransformer(model_id, backend="openvino")
     commit_message = "Add exported openvino model 'openvino_model.xml'"
@@ -355,22 +573,27 @@ Hello!
 ## Tip:
 Consider testing this pull request before merging by loading the model from this PR with the `revision` argument:
 ```python
-from sentence_transformers import SentenceTransformer
 # TODO: Fill in the PR number
 pr_number = 2
-model = SentenceTransformer(
     "{output_model_id}",
     revision=f"refs/pr/{{pr_number}}",
     backend="openvino",
 )
 # Verify that everything works as expected
-embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
 print(embeddings.shape)
 similarities = model.similarity(embeddings, embeddings)
-print(similarities)
 ```
 """
@@ -384,14 +607,22 @@ print(similarities)
                 token=token,
             )
-def export_to_openvino_snippet(model_id: str, create_pr: bool, output_model_id: str) -> str:
-    return """\
 pip install sentence_transformers[openvino]
-""", f"""\
-from sentence_transformers import SentenceTransformer
 # 1. Load the model to be exported with the OpenVINO backend
-model = SentenceTransformer(
     "{model_id}",
     backend="openvino",
 )
@@ -403,25 +634,40 @@ model = SentenceTransformer(
     "{output_model_id}",
     create_pr=True,
 )'''}
-""", f"""\
-from sentence_transformers import SentenceTransformer
 # 1. Load the model from the Hugging Face Hub
 # (until merged) Use the `revision` argument to load the model from the PR
 pr_number = 2
-model = SentenceTransformer(
     "{output_model_id}",
     revision=f"refs/pr/{{pr_number}}",
     backend="openvino",
 )
 # 2. Inference works as normal
 embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
 similarities = model.similarity(embeddings, embeddings)
 """
 def export_to_openvino_static_quantization(
     model_id: str,
     create_pr: bool,
     output_model_id: str,
     ov_quant_dataset_name: str,
@@ -431,10 +677,21 @@ def export_to_openvino_static_quantization(
     ov_quant_dataset_num_samples: int,
     token: Optional[str] = None,
 ) -> None:
-    if does_file_glob_exist(output_model_id, "openvino/openvino_model_qint8_quantized.xml"):
-        raise FileExistsError("The quantized OpenVINO model already exists in the repository")
-    model = SentenceTransformer(model_id, backend="openvino")
     if not create_pr and is_new_model(output_model_id):
         model.push_to_hub(repo_id=output_model_id, token=token)
@@ -459,8 +716,10 @@ def export_to_openvino_static_quantization(
     finally:
         huggingface_hub.upload_folder = original_upload_folder
 def export_to_openvino_static_quantization_snippet(
     model_id: str,
     create_pr: bool,
     output_model_id: str,
     ov_quant_dataset_name: str,
@@ -468,18 +727,23 @@ def export_to_openvino_static_quantization_snippet(
     ov_quant_dataset_split: str,
     ov_quant_dataset_column_name: str,
     ov_quant_dataset_num_samples: int,
-) -> str:
-    return """\
 pip install sentence_transformers[openvino]
-""", f"""\
 from sentence_transformers import (
-    SentenceTransformer,
     export_static_quantized_openvino_model,
 )
 from optimum.intel import OVQuantizationConfig
 # 1. Load the model to be quantized with the OpenVINO backend
-model = SentenceTransformer(
     "{model_id}",
     backend="openvino",
 )
@@ -498,23 +762,37 @@ export_static_quantized_openvino_model(
     push_to_hub=True,
 {'''    create_pr=True,
 ''' if create_pr else ''})
-""", f"""\
-from sentence_transformers import SentenceTransformer
 # 1. Load the model from the Hugging Face Hub
 # (until merged) Use the `revision` argument to load the model from the PR
 pr_number = 2
-model = SentenceTransformer(
     "{output_model_id}",
     revision=f"refs/pr/{{pr_number}}",
     backend="openvino",
     model_kwargs={{"file_name": "openvino_model_qint8_quantized.xml"}},
 )
 # 2. Inference works as normal
 embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
 similarities = model.similarity(embeddings, embeddings)
 """
 def on_submit(
     model_id,
@@ -533,35 +811,67 @@ def on_submit(
     profile: Optional[gr.OAuthProfile] = None,
 ):
     if oauth_token is None or profile is None:
-        return "Commit or PR url:<br>...", inference_snippet, gr.Textbox("Please sign in with Hugging Face to use this Space", visible=True)
     if not model_id:
-        return "Commit or PR url:<br>...", inference_snippet, gr.Textbox("Please enter a model ID", visible=True)
     if not is_sentence_transformer_model(model_id):
-        return "Commit or PR url:<br>...", inference_snippet, gr.Textbox("The source model must have a Sentence Transformers tag", visible=True)
     if output_model_id and "/" not in output_model_id:
         output_model_id = f"{profile.name}/{output_model_id}"
     output_model_id = output_model_id if not create_pr else model_id
     try:
         if backend == Backend.ONNX.value:
-            export_to_onnx(model_id, create_pr, output_model_id, token=oauth_token.token)
         elif backend == Backend.ONNX_DYNAMIC_QUANTIZATION.value:
             export_to_onnx_dynamic_quantization(
-                model_id, create_pr, output_model_id, onnx_quantization_config, token=oauth_token.token
             )
         elif backend == Backend.ONNX_OPTIMIZATION.value:
             export_to_onnx_optimization(
-                model_id, create_pr, output_model_id, onnx_optimization_config, token=oauth_token.token
             )
         elif backend == Backend.OPENVINO.value:
-            export_to_openvino(model_id, create_pr, output_model_id, token=oauth_token.token)
         elif backend == Backend.OPENVINO_STATIC_QUANTIZATION.value:
             export_to_openvino_static_quantization(
                 model_id,
                 create_pr,
                 output_model_id,
                 ov_quant_dataset_name,
@@ -572,19 +882,32 @@ def on_submit(
                 token=oauth_token.token,
             )
     except FileExistsError as exc:
-        return "Commit or PR url:<br>...", inference_snippet, gr.Textbox(str(exc), visible=True)
     if create_pr:
         url, num = get_last_pr(output_model_id)
-        return f"PR url:<br>{url}", inference_snippet.replace("pr_number = 2", f"pr_number = {num}"), gr.Textbox(visible=False)
     # Remove the lines that refer to the revision argument
     lines = inference_snippet.splitlines()
     del lines[7]
     del lines[4]
     del lines[3]
     inference_snippet = "\n".join(lines)
-    return f"Commit url:<br>{get_last_commit(output_model_id)}", inference_snippet, gr.Textbox(visible=False)
 def on_change(
     model_id,
@@ -602,31 +925,44 @@ def on_change(
     profile: Optional[gr.OAuthProfile] = None,
 ) -> str:
     if oauth_token is None or profile is None:
-        return "", "", "", gr.Textbox("Please sign in with Hugging Face to use this Space", visible=True)
     if not model_id:
         return "", "", "", gr.Textbox("Please enter a model ID", visible=True)
     if output_model_id and "/" not in output_model_id:
         output_model_id = f"{profile.username}/{output_model_id}"
     output_model_id = output_model_id if not create_pr else model_id
     if backend == Backend.ONNX.value:
-        snippets = export_to_onnx_snippet(model_id, create_pr, output_model_id)
     elif backend == Backend.ONNX_DYNAMIC_QUANTIZATION.value:
         snippets = export_to_onnx_dynamic_quantization_snippet(
-            model_id, create_pr, output_model_id, onnx_quantization_config
         )
     elif backend == Backend.ONNX_OPTIMIZATION.value:
         snippets = export_to_onnx_optimization_snippet(
-            model_id, create_pr, output_model_id, onnx_optimization_config
         )
     elif backend == Backend.OPENVINO.value:
-        snippets = export_to_openvino_snippet(model_id, create_pr, output_model_id)
     elif backend == Backend.OPENVINO_STATIC_QUANTIZATION.value:
         snippets = export_to_openvino_static_quantization_snippet(
             model_id,
             create_pr,
             output_model_id,
             ov_quant_dataset_name,
@@ -637,7 +973,7 @@ def on_change(
         )
     else:
         return "", "", "", gr.Textbox("Unexpected backend!", visible=True)
     return *snippets, gr.Textbox(visible=False)
@@ -664,34 +1000,75 @@ with gr.Blocks(
     with gr.Row():
         # Left Input Column
         with gr.Column(scale=2):
             gr.Markdown(
                 value="""\
-### Export a Sentence Transformer model to accelerated backends
-Sentence Transformers embedding models can be optimized for **faster inference** on CPU and GPU devices by exporting, quantizing, and optimizing them in ONNX and OpenVINO formats.
-Observe the [Speeding up Inference](https://sbert.net/docs/sentence_transformer/usage/efficiency.html) documentation for more information.
 """,
                 label="",
                 container=True,
             )
-            gr.HTML(value="""\
 <details><summary>Click to see performance benchmarks</summary>
 <table>
   <thead>
     <tr>
-      <th>GPU</th>
-      <th>CPU</th>
     </tr>
   </thead>
   <tbody>
     <tr>
       <td>
-        <img src="https://huggingface.co/spaces/tomaarsen/backend-export/resolve/main/images/backends_benchmark_gpu.png" alt="">
       </td>
       <td>
-        <img src="https://huggingface.co/spaces/tomaarsen/backend-export/resolve/main/images/backends_benchmark_cpu.png" alt="">
       </td>
     </tr>
   </tbody>
@@ -706,11 +1083,12 @@ Observe the [Speeding up Inference](https://sbert.net/docs/sentence_transformer/
 </ul>
 </details>
-""")
             model_id = HuggingfaceHubSearch(
-                label="Sentence Transformer model to export",
-                placeholder="Search for Sentence Transformer models on Hugging Face",
                 search_type="model",
             )
             create_pr = gr.Checkbox(
@@ -741,33 +1119,33 @@ Observe the [Speeding up Inference](https://sbert.net/docs/sentence_transformer/
                 gr.Markdown(
                     value="[ONNX Documentation](https://sbert.net/docs/sentence_transformer/usage/efficiency.html#onnx)",
                     container=True,
-                    elem_classes=["small-text"]
                 )
             with gr.Group(visible=False) as onnx_dynamic_quantization_group:
                 onnx_quantization_config = gr.Radio(
                     choices=["arm64", "avx2", "avx512", "avx512_vnni"],
                     value="avx512_vnni",
                     label="Quantization config",
-                    info="[ONNX Quantization Documentation](https://sbert.net/docs/sentence_transformer/usage/efficiency.html#quantizing-onnx-models)"
                 )
             with gr.Group(visible=False) as onnx_optimization_group:
                 onnx_optimization_config = gr.Radio(
                     choices=["O1", "O2", "O3", "O4"],
                     value="O4",
                     label="Optimization config",
-                    info="[ONNX Optimization Documentation](https://sbert.net/docs/sentence_transformer/usage/efficiency.html#optimizing-onnx-models)"
                 )
             with gr.Group(visible=False) as openvino_group:
                 gr.Markdown(
                     value="[OpenVINO Documentation](https://sbert.net/docs/sentence_transformer/usage/efficiency.html#openvino)",
                     container=True,
-                    elem_classes=["small-text"]
                 )
             with gr.Group(visible=False) as openvino_static_quantization_group:
                 gr.Markdown(
                     value="[OpenVINO Quantization Documentation](https://sbert.net/docs/sentence_transformer/usage/efficiency.html#quantizing-openvino-models)",
                     container=True,
-                    elem_classes=["small-text"]
                 )
                 ov_quant_dataset_name = HuggingfaceHubSearch(
                     value="nyu-mll/glue",

 from enum import Enum
+from functools import lru_cache, partial
+import json
 from pathlib import Path
 from typing import Optional, Tuple
 import gradio as gr
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 import huggingface_hub
+from sentence_transformers import CrossEncoder, SentenceTransformer, SparseEncoder
 from sentence_transformers import (
     export_dynamic_quantized_onnx_model as st_export_dynamic_quantized_onnx_model,
     export_optimized_onnx_model as st_export_optimized_onnx_model,
     export_static_quantized_openvino_model as st_export_static_quantized_openvino_model,
 )
+from huggingface_hub import (
+    model_info,
+    upload_folder,
+    get_repo_discussions,
+    list_repo_commits,
+    HfFileSystem,
+    hf_hub_download,
+)
+from huggingface_hub.errors import (
+    RepositoryNotFoundError,
+    HFValidationError,
+    EntryNotFoundError,
+)
 from optimum.intel import OVQuantizationConfig
 from tempfile import TemporaryDirectory
         return self.value
+class Archetype(Enum):
+    SENTENCE_TRANSFORMER = "SentenceTransformer"
+    SPARSE_ENCODER = "SparseEncoder"
+    CROSS_ENCODER = "CrossEncoder"
+    OTHER = "Other"
+    def __str__(self):
+        return self.value
 backends = [str(backend) for backend in Backend]
 FILE_SYSTEM = HfFileSystem()
 def is_new_model(model_id: str) -> bool:
     """
     Check if the model ID exists on the Hugging Face Hub. If we get a request error, then we
     return "sentence-transformers" in model_info(model_id).tags
+@lru_cache()
+def get_archetype(model_id: str) -> Archetype:
+    if "/" not in model_id:
+        return Archetype.OTHER
+    try:
+        config_sentence_transformers_path = hf_hub_download(
+            model_id, filename="config_sentence_transformers.json"
+        )
+    except (RepositoryNotFoundError, HFValidationError):
+        return Archetype.OTHER
+    except EntryNotFoundError:
+        config_sentence_transformers_path = None
+    try:
+        config_path = hf_hub_download(model_id, filename="config.json")
+    except (RepositoryNotFoundError, HFValidationError):
+        return Archetype.OTHER
+    except EntryNotFoundError:
+        config_path = None
+    if config_sentence_transformers_path is None and config_path is None:
+        return Archetype.OTHER
+    if config_sentence_transformers_path is not None:
+        with open(config_sentence_transformers_path, "r", encoding="utf8") as f:
+            st_config = json.load(f)
+            model_type = st_config.get("model_type", "SentenceTransformer")
+            if model_type == "SentenceTransformer":
+                return Archetype.SENTENCE_TRANSFORMER
+            elif model_type == "SparseEncoder":
+                return Archetype.SPARSE_ENCODER
+            else:
+                return Archetype.OTHER
+    if config_path is not None:
+        with open(config_path, "r", encoding="utf8") as f:
+            config = json.load(f)
+            if "sentence_transformers" in config or config["architectures"][0].endswith(
+                "ForSequenceClassification"
+            ):
+                return Archetype.CROSS_ENCODER
+    return Archetype.OTHER
 def get_last_commit(model_id: str) -> str:
     """
     Get the last commit hash of the model ID.
     """
     return f"https://huggingface.co/{model_id}/commit/{list_repo_commits(model_id)[0].commit_id}"
 def get_last_pr(model_id: str) -> Tuple[str, int]:
     last_pr = next(get_repo_discussions(model_id))
     return last_pr.url, last_pr.num
     )
+def export_to_onnx(
+    model_id: str,
+    archetype: Archetype,
+    create_pr: bool,
+    output_model_id: str,
+    token: Optional[str] = None,
+) -> None:
     if does_file_glob_exist(output_model_id, "**/model.onnx"):
         raise FileExistsError("An ONNX model already exists in the repository")
+    if archetype == Archetype.SENTENCE_TRANSFORMER:
+        model = SentenceTransformer(model_id, backend="onnx")
+    elif archetype == Archetype.SPARSE_ENCODER:
+        model = SparseEncoder(model_id, backend="onnx")
+    elif archetype == Archetype.CROSS_ENCODER:
+        model = CrossEncoder(model_id, backend="onnx")
+    else:
+        return
     commit_message = "Add exported onnx model 'model.onnx'"
     if is_new_model(output_model_id):
 ## Tip:
 Consider testing this pull request before merging by loading the model from this PR with the `revision` argument:
 ```python
+from sentence_transformers import {archetype}
 # TODO: Fill in the PR number
 pr_number = 2
+model = {archetype}(
     "{output_model_id}",
     revision=f"refs/pr/{{pr_number}}",
     backend="onnx",
 )
 # Verify that everything works as expected
+{'''embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
 print(embeddings.shape)
 similarities = model.similarity(embeddings, embeddings)
+print(similarities)''' if archetype in {Archetype.SENTENCE_TRANSFORMER, Archetype.SPARSE_ENCODER} else
+'''predictions = model.predict([
+    ["Which planet is known as the Red Planet?", "Mars, known for its reddish appearance, is often referred to as the Red Planet."],
+    ["Which planet is known as the Red Planet?", "Jupiter, the largest planet in our solar system, has a prominent red spot."],
+])
+print(predictions)'''}
 ```
 """
                 token=token,
             )
+def export_to_onnx_snippet(
+    model_id: str, archetype: Archetype, create_pr: bool, output_model_id: str
+) -> Tuple[str, str, str]:
+    if archetype == Archetype.OTHER:
+        return "", "", ""
+    return (
+        """\
 pip install sentence_transformers[onnx-gpu]
 # or
 pip install sentence_transformers[onnx]
+""",
+        f"""\
+from sentence_transformers import {archetype}
 # 1. Load the model to be exported with the ONNX backend
+model = {archetype}(
     "{model_id}",
     backend="onnx",
 )
     "{output_model_id}",
     create_pr=True,
 )'''}
+""",
+        f"""\
+from sentence_transformers import {archetype}
 # 1. Load the model from the Hugging Face Hub
 # (until merged) Use the `revision` argument to load the model from the PR
 pr_number = 2
+model = {archetype}(
     "{output_model_id}",
     revision=f"refs/pr/{{pr_number}}",
     backend="onnx",
 )
+"""
+        + (
+            """
 # 2. Inference works as normal
 embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
 similarities = model.similarity(embeddings, embeddings)
 """
+            if archetype in {Archetype.SENTENCE_TRANSFORMER, Archetype.SPARSE_ENCODER}
+            else """
+# 2. Inference works as normal
+predictions = model.predict([
+    ["Which planet is known as the Red Planet?", "Mars, known for its reddish appearance, is often referred to as the Red Planet."],
+    ["Which planet is known as the Red Planet?", "Jupiter, the largest planet in our solar system, has a prominent red spot."],
+])
+"""
+        ),
+    )
 def export_to_onnx_dynamic_quantization(
+    model_id: str,
+    archetype: Archetype,
+    create_pr: bool,
+    output_model_id: str,
+    onnx_quantization_config: str,
+    token: Optional[str] = None,
 ) -> None:
+    if does_file_glob_exist(
+        output_model_id, f"onnx/model_qint8_{onnx_quantization_config}.onnx"
+    ):
+        raise FileExistsError(
+            "The quantized ONNX model already exists in the repository"
+        )
+    if archetype == Archetype.SENTENCE_TRANSFORMER:
+        model = SentenceTransformer(model_id, backend="onnx")
+    elif archetype == Archetype.SPARSE_ENCODER:
+        model = SparseEncoder(model_id, backend="onnx")
+    elif archetype == Archetype.CROSS_ENCODER:
+        model = CrossEncoder(model_id, backend="onnx")
+    else:
+        return
     if not create_pr and is_new_model(output_model_id):
         model.push_to_hub(repo_id=output_model_id, token=token)
         )
     except ValueError:
         # Currently, quantization with optimum has some issues if there's already an ONNX model in a subfolder
+        if archetype == Archetype.SENTENCE_TRANSFORMER:
+            model = SentenceTransformer(
+                model_id, backend="onnx", model_kwargs={"export": True}
+            )
+        elif archetype == Archetype.SPARSE_ENCODER:
+            model = SparseEncoder(
+                model_id, backend="onnx", model_kwargs={"export": True}
+            )
+        elif archetype == Archetype.CROSS_ENCODER:
+            model = CrossEncoder(
+                model_id, backend="onnx", model_kwargs={"export": True}
+            )
+        else:
+            return
         st_export_dynamic_quantized_onnx_model(
             model,
             quantization_config=onnx_quantization_config,
     finally:
         huggingface_hub.upload_folder = original_upload_folder
 def export_to_onnx_dynamic_quantization_snippet(
+    model_id: str,
+    archetype: Archetype,
+    create_pr: bool,
+    output_model_id: str,
+    onnx_quantization_config: str,
+) -> Tuple[str, str, str]:
+    if archetype == Archetype.OTHER:
+        return "", "", ""
+    return (
+        """\
 pip install sentence_transformers[onnx-gpu]
 # or
 pip install sentence_transformers[onnx]
+""",
+        f"""\
 from sentence_transformers import (
+    {archetype},
     export_dynamic_quantized_onnx_model,
 )
+# 1. Load the model to be exported with the ONNX backend
+model = {archetype}(
     "{model_id}",
     backend="onnx",
 )
     push_to_hub=True,
 {'''    create_pr=True,
 ''' if create_pr else ''})
+""",
+        f"""\
+from sentence_transformers import {archetype}
 # 1. Load the model from the Hugging Face Hub
 # (until merged) Use the `revision` argument to load the model from the PR
 pr_number = 2
+model = {archetype}(
     "{output_model_id}",
     revision=f"refs/pr/{{pr_number}}",
     backend="onnx",
     model_kwargs={{"file_name": "model_qint8_{onnx_quantization_config}.onnx"}},
 )
+"""
+        + (
+            """
 # 2. Inference works as normal
 embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
 similarities = model.similarity(embeddings, embeddings)
 """
+            if archetype in {Archetype.SENTENCE_TRANSFORMER, Archetype.SPARSE_ENCODER}
+            else """
+# 2. Inference works as normal
+predictions = model.predict([
+    ["Which planet is known as the Red Planet?", "Mars, known for its reddish appearance, is often referred to as the Red Planet."],
+    ["Which planet is known as the Red Planet?", "Jupiter, the largest planet in our solar system, has a prominent red spot."],
+])
+"""
+        ),
+    )
+def export_to_onnx_optimization(
+    model_id: str,
+    archetype: Archetype,
+    create_pr: bool,
+    output_model_id: str,
+    onnx_optimization_config: str,
+    token: Optional[str] = None,
+) -> None:
+    if does_file_glob_exist(
+        output_model_id, f"onnx/model_{onnx_optimization_config}.onnx"
+    ):
+        raise FileExistsError(
+            "The optimized ONNX model already exists in the repository"
+        )
+    if archetype == Archetype.SENTENCE_TRANSFORMER:
+        model = SentenceTransformer(model_id, backend="onnx")
+    elif archetype == Archetype.SPARSE_ENCODER:
+        model = SparseEncoder(model_id, backend="onnx")
+    elif archetype == Archetype.CROSS_ENCODER:
+        model = CrossEncoder(model_id, backend="onnx")
+    else:
+        return
     if not create_pr and is_new_model(output_model_id):
         model.push_to_hub(repo_id=output_model_id, token=token)
     finally:
         huggingface_hub.upload_folder = original_upload_folder
+def export_to_onnx_optimization_snippet(
+    model_id: str,
+    archetype: Archetype,
+    create_pr: bool,
+    output_model_id: str,
+    onnx_optimization_config: str,
+) -> Tuple[str, str, str]:
+    if archetype == Archetype.OTHER:
+        return "", "", ""
+    return (
+        """\
 pip install sentence_transformers[onnx-gpu]
 # or
 pip install sentence_transformers[onnx]
+""",
+        f"""\
 from sentence_transformers import (
+    {archetype},
     export_optimized_onnx_model,
 )
 # 1. Load the model to be optimized with the ONNX backend
+model = {archetype}(
     "{model_id}",
     backend="onnx",
 )
     push_to_hub=True,
 {'''    create_pr=True,
 ''' if create_pr else ''})
+""",
+        f"""\
+from sentence_transformers import {archetype}
 # 1. Load the model from the Hugging Face Hub
 # (until merged) Use the `revision` argument to load the model from the PR
 pr_number = 2
+model = {archetype}(
     "{output_model_id}",
     revision=f"refs/pr/{{pr_number}}",
     backend="onnx",
     model_kwargs={{"file_name": "model_{onnx_optimization_config}.onnx"}},
 )
+"""
+        + (
+            """
 # 2. Inference works as normal
 embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
 similarities = model.similarity(embeddings, embeddings)
 """
+            if archetype in {Archetype.SENTENCE_TRANSFORMER, Archetype.SPARSE_ENCODER}
+            else """
+# 2. Inference works as normal
+predictions = model.predict([
+    ["Which planet is known as the Red Planet?", "Mars, known for its reddish appearance, is often referred to as the Red Planet."],
+    ["Which planet is known as the Red Planet?", "Jupiter, the largest planet in our solar system, has a prominent red spot."],
+])
+"""
+        ),
+    )
+def export_to_openvino(
+    model_id: str,
+    archetype: Archetype,
+    create_pr: bool,
+    output_model_id: str,
+    token: Optional[str] = None,
+) -> None:
     if does_file_glob_exist(output_model_id, "**/openvino_model.xml"):
         raise FileExistsError("The OpenVINO model already exists in the repository")
+    if archetype == Archetype.SENTENCE_TRANSFORMER:
+        model = SentenceTransformer(model_id, backend="openvino")
+    elif archetype == Archetype.SPARSE_ENCODER:
+        model = SparseEncoder(model_id, backend="openvino")
+    elif archetype == Archetype.CROSS_ENCODER:
+        model = CrossEncoder(model_id, backend="openvino")
+    else:
+        return
     commit_message = "Add exported openvino model 'openvino_model.xml'"
 ## Tip:
 Consider testing this pull request before merging by loading the model from this PR with the `revision` argument:
 ```python
+from sentence_transformers import {archetype}
 # TODO: Fill in the PR number
 pr_number = 2
+model = {archetype}(
     "{output_model_id}",
     revision=f"refs/pr/{{pr_number}}",
     backend="openvino",
 )
 # Verify that everything works as expected
+{'''embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
 print(embeddings.shape)
 similarities = model.similarity(embeddings, embeddings)
+print(similarities)''' if archetype in {Archetype.SENTENCE_TRANSFORMER, Archetype.SPARSE_ENCODER} else
+'''predictions = model.predict([
+    ["Which planet is known as the Red Planet?", "Mars, known for its reddish appearance, is often referred to as the Red Planet."],
+    ["Which planet is known as the Red Planet?", "Jupiter, the largest planet in our solar system, has a prominent red spot."],
+])
+print(predictions)'''}
 ```
 """
                 token=token,
             )
+def export_to_openvino_snippet(
+    model_id: str, archetype: Archetype, create_pr: bool, output_model_id: str
+) -> Tuple[str, str, str]:
+    if archetype == Archetype.OTHER:
+        return "", "", ""
+    return (
+        """\
 pip install sentence_transformers[openvino]
+""",
+        f"""\
+from sentence_transformers import {archetype}
 # 1. Load the model to be exported with the OpenVINO backend
+model = {archetype}(
     "{model_id}",
     backend="openvino",
 )
     "{output_model_id}",
     create_pr=True,
 )'''}
+""",
+        f"""\
+from sentence_transformers import {archetype}
 # 1. Load the model from the Hugging Face Hub
 # (until merged) Use the `revision` argument to load the model from the PR
 pr_number = 2
+model = {archetype}(
     "{output_model_id}",
     revision=f"refs/pr/{{pr_number}}",
     backend="openvino",
 )
+"""
+        + (
+            """
 # 2. Inference works as normal
 embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
 similarities = model.similarity(embeddings, embeddings)
 """
+            if archetype in {Archetype.SENTENCE_TRANSFORMER, Archetype.SPARSE_ENCODER}
+            else """
+# 2. Inference works as normal
+predictions = model.predict([
+    ["Which planet is known as the Red Planet?", "Mars, known for its reddish appearance, is often referred to as the Red Planet."],
+    ["Which planet is known as the Red Planet?", "Jupiter, the largest planet in our solar system, has a prominent red spot."],
+])
+"""
+        ),
+    )
 def export_to_openvino_static_quantization(
     model_id: str,
+    archetype: Archetype,
     create_pr: bool,
     output_model_id: str,
     ov_quant_dataset_name: str,
     ov_quant_dataset_num_samples: int,
     token: Optional[str] = None,
 ) -> None:
+    if does_file_glob_exist(
+        output_model_id, "openvino/openvino_model_qint8_quantized.xml"
+    ):
+        raise FileExistsError(
+            "The quantized OpenVINO model already exists in the repository"
+        )
+    if archetype == Archetype.SENTENCE_TRANSFORMER:
+        model = SentenceTransformer(model_id, backend="openvino")
+    elif archetype == Archetype.SPARSE_ENCODER:
+        model = SparseEncoder(model_id, backend="openvino")
+    elif archetype == Archetype.CROSS_ENCODER:
+        model = CrossEncoder(model_id, backend="openvino")
+    else:
+        return
     if not create_pr and is_new_model(output_model_id):
         model.push_to_hub(repo_id=output_model_id, token=token)
     finally:
         huggingface_hub.upload_folder = original_upload_folder
 def export_to_openvino_static_quantization_snippet(
     model_id: str,
+    archetype: Archetype,
     create_pr: bool,
     output_model_id: str,
     ov_quant_dataset_name: str,
     ov_quant_dataset_split: str,
     ov_quant_dataset_column_name: str,
     ov_quant_dataset_num_samples: int,
+) -> Tuple[str, str, str]:
+    if archetype == Archetype.OTHER:
+        return "", "", ""
+    return (
+        """\
 pip install sentence_transformers[openvino]
+""",
+        f"""\
 from sentence_transformers import (
+    {archetype},
     export_static_quantized_openvino_model,
 )
 from optimum.intel import OVQuantizationConfig
 # 1. Load the model to be quantized with the OpenVINO backend
+model = {archetype}(
     "{model_id}",
     backend="openvino",
 )
     push_to_hub=True,
 {'''    create_pr=True,
 ''' if create_pr else ''})
+""",
+        f"""\
+from sentence_transformers import {archetype}
 # 1. Load the model from the Hugging Face Hub
 # (until merged) Use the `revision` argument to load the model from the PR
 pr_number = 2
+model = {archetype}(
     "{output_model_id}",
     revision=f"refs/pr/{{pr_number}}",
     backend="openvino",
     model_kwargs={{"file_name": "openvino_model_qint8_quantized.xml"}},
 )
+"""
+        + (
+            """
 # 2. Inference works as normal
 embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
 similarities = model.similarity(embeddings, embeddings)
 """
+            if archetype in {Archetype.SENTENCE_TRANSFORMER, Archetype.SPARSE_ENCODER}
+            else """
+# 2. Inference works as normal
+predictions = model.predict([
+    ["Which planet is known as the Red Planet?", "Mars, known for its reddish appearance, is often referred to as the Red Planet."],
+    ["Which planet is known as the Red Planet?", "Jupiter, the largest planet in our solar system, has a prominent red spot."],
+])
+"""
+        ),
+    )
 def on_submit(
     model_id,
     profile: Optional[gr.OAuthProfile] = None,
 ):
     if oauth_token is None or profile is None:
+        return (
+            "Commit or PR url:<br>...",
+            inference_snippet,
+            gr.Textbox(
+                "Please sign in with Hugging Face to use this Space", visible=True
+            ),
+        )
     if not model_id:
+        return (
+            "Commit or PR url:<br>...",
+            inference_snippet,
+            gr.Textbox("Please enter a model ID", visible=True),
+        )
     if not is_sentence_transformer_model(model_id):
+        return (
+            "Commit or PR url:<br>...",
+            inference_snippet,
+            gr.Textbox(
+                "The source model must have a Sentence Transformers tag", visible=True
+            ),
+        )
     if output_model_id and "/" not in output_model_id:
         output_model_id = f"{profile.name}/{output_model_id}"
     output_model_id = output_model_id if not create_pr else model_id
+    archetype = get_archetype(model_id)
     try:
         if backend == Backend.ONNX.value:
+            export_to_onnx(
+                model_id, archetype, create_pr, output_model_id, token=oauth_token.token
+            )
         elif backend == Backend.ONNX_DYNAMIC_QUANTIZATION.value:
             export_to_onnx_dynamic_quantization(
+                model_id,
+                archetype,
+                create_pr,
+                output_model_id,
+                onnx_quantization_config,
+                token=oauth_token.token,
             )
         elif backend == Backend.ONNX_OPTIMIZATION.value:
             export_to_onnx_optimization(
+                model_id,
+                archetype,
+                create_pr,
+                output_model_id,
+                onnx_optimization_config,
+                token=oauth_token.token,
             )
         elif backend == Backend.OPENVINO.value:
+            export_to_openvino(
+                model_id, archetype, create_pr, output_model_id, token=oauth_token.token
+            )
         elif backend == Backend.OPENVINO_STATIC_QUANTIZATION.value:
             export_to_openvino_static_quantization(
                 model_id,
+                archetype,
                 create_pr,
                 output_model_id,
                 ov_quant_dataset_name,
                 token=oauth_token.token,
             )
     except FileExistsError as exc:
+        return (
+            "Commit or PR url:<br>...",
+            inference_snippet,
+            gr.Textbox(str(exc), visible=True),
+        )
     if create_pr:
         url, num = get_last_pr(output_model_id)
+        return (
+            f"PR url:<br>{url}",
+            inference_snippet.replace("pr_number = 2", f"pr_number = {num}"),
+            gr.Textbox(visible=False),
+        )
     # Remove the lines that refer to the revision argument
     lines = inference_snippet.splitlines()
     del lines[7]
     del lines[4]
     del lines[3]
     inference_snippet = "\n".join(lines)
+    return (
+        f"Commit url:<br>{get_last_commit(output_model_id)}",
+        inference_snippet,
+        gr.Textbox(visible=False),
+    )
 def on_change(
     model_id,
     profile: Optional[gr.OAuthProfile] = None,
 ) -> str:
     if oauth_token is None or profile is None:
+        return (
+            "",
+            "",
+            "",
+            gr.Textbox(
+                "Please sign in with Hugging Face to use this Space", visible=True
+            ),
+        )
     if not model_id:
         return "", "", "", gr.Textbox("Please enter a model ID", visible=True)
     if output_model_id and "/" not in output_model_id:
         output_model_id = f"{profile.username}/{output_model_id}"
     output_model_id = output_model_id if not create_pr else model_id
+    archetype = get_archetype(model_id)
     if backend == Backend.ONNX.value:
+        snippets = export_to_onnx_snippet(
+            model_id, archetype, create_pr, output_model_id
+        )
     elif backend == Backend.ONNX_DYNAMIC_QUANTIZATION.value:
         snippets = export_to_onnx_dynamic_quantization_snippet(
+            model_id, archetype, create_pr, output_model_id, onnx_quantization_config
         )
     elif backend == Backend.ONNX_OPTIMIZATION.value:
         snippets = export_to_onnx_optimization_snippet(
+            model_id, archetype, create_pr, output_model_id, onnx_optimization_config
         )
     elif backend == Backend.OPENVINO.value:
+        snippets = export_to_openvino_snippet(
+            model_id, archetype, create_pr, output_model_id
+        )
     elif backend == Backend.OPENVINO_STATIC_QUANTIZATION.value:
         snippets = export_to_openvino_static_quantization_snippet(
             model_id,
+            archetype,
             create_pr,
             output_model_id,
             ov_quant_dataset_name,
         )
     else:
         return "", "", "", gr.Textbox("Unexpected backend!", visible=True)
     return *snippets, gr.Textbox(visible=False)
     with gr.Row():
         # Left Input Column
         with gr.Column(scale=2):
             gr.Markdown(
                 value="""\
+### Export a SentenceTransformer, SparseEncoder, or CrossEncoder model to accelerated backends
+Sentence Transformers models can be optimized for **faster inference** on CPU and GPU devices by exporting, quantizing, and optimizing them in ONNX and OpenVINO formats.
+Observe the Speeding up Inference documentation for more information:
+* [SentenceTransformer > Speeding up Inference](https://sbert.net/docs/sentence_transformer/usage/efficiency.html)
+* [SparseEncoder > Speeding up Inference](https://sbert.net/docs/sparse_encoder/usage/efficiency.html)
+* [CrossEncoder > Speeding up Inference](https://sbert.net/docs/cross_encoder/usage/efficiency.html)
 """,
                 label="",
                 container=True,
             )
+            gr.HTML(
+                value="""\
 <details><summary>Click to see performance benchmarks</summary>
 <table>
   <thead>
     <tr>
+      <th>SentenceTransformer GPU</th>
+      <th>SentenceTransformer CPU</th>
     </tr>
   </thead>
   <tbody>
     <tr>
       <td>
+        <img src="https://sbert.net/_images/backends_benchmark_gpu.png" alt="">
       </td>
       <td>
+        <img src="https://sbert.net/_images/backends_benchmark_cpu.png" alt="">
+      </td>
+    </tr>
+  </tbody>
+</table>
+<table>
+  <thead>
+    <tr>
+      <th>SparseEncoder GPU</th>
+      <th>SparseEncoder CPU</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>
+        <img src="https://sbert.net/_images/se_backends_benchmark_gpu.png" alt="">
+      </td>
+      <td>
+        <img src="https://sbert.net/_images/se_backends_benchmark_cpu.png" alt="">
+      </td>
+    </tr>
+  </tbody>
+</table>
+<table>
+  <thead>
+    <tr>
+      <th>CrossEncoder GPU</th>
+      <th>CrossEncoder CPU</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>
+        <img src="https://sbert.net/_images/ce_backends_benchmark_gpu.png" alt="">
+      </td>
+      <td>
+        <img src="https://sbert.net/_images/ce_backends_benchmark_cpu.png" alt="">
       </td>
     </tr>
   </tbody>
 </ul>
 </details>
+"""
+            )
             model_id = HuggingfaceHubSearch(
+                label="SentenceTransformer, SparseEncoder, or CrossEncoder model to export",
+                placeholder="Search for SentenceTransformer, SparseEncoder, or CrossEncoder models on Hugging Face",
                 search_type="model",
             )
             create_pr = gr.Checkbox(
                 gr.Markdown(
                     value="[ONNX Documentation](https://sbert.net/docs/sentence_transformer/usage/efficiency.html#onnx)",
                     container=True,
+                    elem_classes=["small-text"],
                 )
             with gr.Group(visible=False) as onnx_dynamic_quantization_group:
                 onnx_quantization_config = gr.Radio(
                     choices=["arm64", "avx2", "avx512", "avx512_vnni"],
                     value="avx512_vnni",
                     label="Quantization config",
+                    info="[ONNX Quantization Documentation](https://sbert.net/docs/sentence_transformer/usage/efficiency.html#quantizing-onnx-models)",
                 )
             with gr.Group(visible=False) as onnx_optimization_group:
                 onnx_optimization_config = gr.Radio(
                     choices=["O1", "O2", "O3", "O4"],
                     value="O4",
                     label="Optimization config",
+                    info="[ONNX Optimization Documentation](https://sbert.net/docs/sentence_transformer/usage/efficiency.html#optimizing-onnx-models)",
                 )
             with gr.Group(visible=False) as openvino_group:
                 gr.Markdown(
                     value="[OpenVINO Documentation](https://sbert.net/docs/sentence_transformer/usage/efficiency.html#openvino)",
                     container=True,
+                    elem_classes=["small-text"],
                 )
             with gr.Group(visible=False) as openvino_static_quantization_group:
                 gr.Markdown(
                     value="[OpenVINO Quantization Documentation](https://sbert.net/docs/sentence_transformer/usage/efficiency.html#quantizing-openvino-models)",
                     container=True,
+                    elem_classes=["small-text"],
                 )
                 ov_quant_dataset_name = HuggingfaceHubSearch(
                     value="nyu-mll/glue",

images/backends_benchmark_cpu.png DELETED Viewed

Binary file (63.2 kB)

images/backends_benchmark_gpu.png DELETED Viewed

Binary file (59.9 kB)

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
-sentence_transformers[onnx-gpu,openvino]==3.3.0
 onnx==1.16.1
 https://huggingface.co/spaces/CISCai/chat-template-editor/resolve/08c8e90c53677ae70c66b3d90bf4e63a173b5505/gradio_huggingfacehub_search-0.0.8-py3-none-any.whl
-gradio[oauth]==5.5.0
-huggingface_hub==0.26.2

+sentence_transformers[onnx-gpu,openvino]==5.1.0
 onnx==1.16.1
 https://huggingface.co/spaces/CISCai/chat-template-editor/resolve/08c8e90c53677ae70c66b3d90bf4e63a173b5505/gradio_huggingfacehub_search-0.0.8-py3-none-any.whl
+gradio[oauth]==5.42.0
+huggingface_hub==0.34.4