HoneyTian commited on
Commit
e7cad17
·
1 Parent(s): e027841
examples/silero_vad_by_webrtcvad/run.sh CHANGED
@@ -4,7 +4,7 @@
4
 
5
  bash run.sh --stage 3 --stop_stage 5 --system_version centos \
6
  --file_folder_name silero-vad-by-webrtcvad-nx2-dns3 \
7
- --final_model_name silero-vad-by-webrtcvad-nx2-dns3-20251119 \
8
  --noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
9
  --speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
10
  /data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav" \
 
4
 
5
  bash run.sh --stage 3 --stop_stage 5 --system_version centos \
6
  --file_folder_name silero-vad-by-webrtcvad-nx2-dns3 \
7
+ --final_model_name silero-vad-by-webrtcvad-nx2-dns3-20251120 \
8
  --noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
9
  --speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
10
  /data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav" \
examples/silero_vad_by_webrtcvad/step_5_export_model.py CHANGED
@@ -81,18 +81,23 @@ def main():
81
  "new_encoder_hidden_cache_list",
82
  "new_lstm_hidden_state"
83
  ],
84
- dynamic_axes={
85
- "inputs": {0: "batch_size", 2: "num_samples"},
86
- "encoder_in_cache": {1: "batch_size"},
87
- "encoder_hidden_cache_list": {1: "batch_size"},
88
- "lstm_hidden_state": {2: "batch_size"},
89
- "logits": {0: "batch_size"},
90
- "probs": {0: "batch_size"},
91
- "lsnr": {0: "batch_size"},
92
- "new_encoder_in_cache": {1: "batch_size"},
93
- "new_encoder_hidden_cache_list": {1: "batch_size"},
94
- "new_lstm_hidden_state": {2: "batch_size"},
95
- })
 
 
 
 
 
96
 
97
  ort_session = ort.InferenceSession(
98
  output_file.as_posix()
 
81
  "new_encoder_hidden_cache_list",
82
  "new_lstm_hidden_state"
83
  ],
84
+ dynamic_axes={"inputs": {2: "num_samples"}},
85
+ # UserWarning: Exporting a model to ONNX with a batch_size other than 1,
86
+ # with a variable length with LSTM can cause an error when running the ONNX model with a different batch size.
87
+ # Make sure to save the model with a batch size of 1, or define the initial states (h0/c0) as inputs of the model.
88
+ # dynamic_axes={
89
+ # "inputs": {0: "batch_size", 2: "num_samples"},
90
+ # "encoder_in_cache": {1: "batch_size"},
91
+ # "encoder_hidden_cache_list": {1: "batch_size"},
92
+ # "lstm_hidden_state": {2: "batch_size"},
93
+ # "logits": {0: "batch_size"},
94
+ # "probs": {0: "batch_size"},
95
+ # "lsnr": {0: "batch_size"},
96
+ # "new_encoder_in_cache": {1: "batch_size"},
97
+ # "new_encoder_hidden_cache_list": {1: "batch_size"},
98
+ # "new_lstm_hidden_state": {2: "batch_size"},
99
+ # },
100
+ )
101
 
102
  ort_session = ort.InferenceSession(
103
  output_file.as_posix()
examples/silero_vad_by_webrtcvad/yaml/config-256-0-20.yaml CHANGED
@@ -19,12 +19,12 @@ decoder_num_layers: 2
19
 
20
  # lsnr
21
  n_frame: 3
22
- min_local_snr_db: -5
23
  max_local_snr_db: 30
24
  norm_tau: 1.
25
 
26
  # data
27
- min_snr_db: 0
28
  max_snr_db: 20
29
 
30
  # train
 
19
 
20
  # lsnr
21
  n_frame: 3
22
+ min_local_snr_db: -15
23
  max_local_snr_db: 30
24
  norm_tau: 1.
25
 
26
  # data
27
+ min_snr_db: -10
28
  max_snr_db: 20
29
 
30
  # train