Commit
·
391404d
1
Parent(s):
049a0c7
Update README.md
Browse files
README.md
CHANGED
|
@@ -42,18 +42,18 @@ The details of three subnets are:
|
|
| 42 |
## Compute your speaker embeddings
|
| 43 |
|
| 44 |
```python
|
| 45 |
-
import
|
| 46 |
from sugar.models import WrappedModel
|
| 47 |
-
|
| 48 |
-
signal, fs =torchaudio.load(wav_file)
|
| 49 |
|
| 50 |
repo_id = "mechanicalsea/efficient-tdnn"
|
| 51 |
supernet_filename = "depth/depth.torchparams"
|
| 52 |
subnet_filename = "depth/depth.ecapa-tdnn.3.512.512.512.512.5.3.3.3.1536.bn.tar"
|
| 53 |
-
subnet, info = WrappedModel.from_pretrained(
|
| 54 |
-
|
|
|
|
| 55 |
|
| 56 |
-
embedding = subnet(
|
| 57 |
```
|
| 58 |
|
| 59 |
## Inference on GPU
|
|
@@ -112,14 +112,13 @@ More details about EfficentTDNN can be found in the paper [EfficientTDNN](https:
|
|
| 112 |
Please, cite EfficientTDNN if you use it for your research or business.
|
| 113 |
|
| 114 |
```bibtex
|
| 115 |
-
@article{
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
year={
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
}
|
| 125 |
```
|
|
|
|
| 42 |
## Compute your speaker embeddings
|
| 43 |
|
| 44 |
```python
|
| 45 |
+
import torch
|
| 46 |
from sugar.models import WrappedModel
|
| 47 |
+
wav_input_16khz = torch.randn(1,10000).cuda()
|
|
|
|
| 48 |
|
| 49 |
repo_id = "mechanicalsea/efficient-tdnn"
|
| 50 |
supernet_filename = "depth/depth.torchparams"
|
| 51 |
subnet_filename = "depth/depth.ecapa-tdnn.3.512.512.512.512.5.3.3.3.1536.bn.tar"
|
| 52 |
+
subnet, info = WrappedModel.from_pretrained(repo_id=repo_id, supernet_filename=supernet_filename, subnet_filename=subnet_filename)
|
| 53 |
+
subnet = subnet.cuda()
|
| 54 |
+
subnet = subnet.eval()
|
| 55 |
|
| 56 |
+
embedding = subnet(wav_input_16khz)
|
| 57 |
```
|
| 58 |
|
| 59 |
## Inference on GPU
|
|
|
|
| 112 |
Please, cite EfficientTDNN if you use it for your research or business.
|
| 113 |
|
| 114 |
```bibtex
|
| 115 |
+
@article{wr-efficienttdnn-2022,
|
| 116 |
+
author={Wang, Rui and Wei, Zhihua and Duan, Haoran and Ji, Shouling and Long, Yang and Hong, Zhen},
|
| 117 |
+
journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
|
| 118 |
+
title={EfficientTDNN: Efficient Architecture Search for Speaker Recognition},
|
| 119 |
+
year={2022},
|
| 120 |
+
volume={30},
|
| 121 |
+
number={},
|
| 122 |
+
pages={2267-2279},
|
| 123 |
+
doi={10.1109/TASLP.2022.3182856}}
|
|
|
|
| 124 |
```
|