HoneyTian commited on
Commit
208797e
·
1 Parent(s): fe5f79c
examples/fsmn_vad_by_webrtcvad/run.sh CHANGED
@@ -7,7 +7,7 @@ bash run.sh --stage 3 --stop_stage 5 --system_version centos \
7
  --final_model_name fsmn-vad-nx2-dns3-256-128-4-20251125 \
8
  --noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
9
  --speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
10
- /data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav"
11
  --config_file yaml/config-256-128-4.yaml
12
 
13
 
 
7
  --final_model_name fsmn-vad-nx2-dns3-256-128-4-20251125 \
8
  --noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
9
  --speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
10
+ /data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav" \
11
  --config_file yaml/config-256-128-4.yaml
12
 
13
 
examples/fsmn_vad_by_webrtcvad/yaml/config-240-80-4.yaml CHANGED
@@ -2,7 +2,7 @@ model_name: "fsmn_vad"
2
 
3
  # spec
4
  sample_rate: 8000
5
- nfft: 512
6
  win_size: 240
7
  hop_size: 80
8
  win_type: hann
 
2
 
3
  # spec
4
  sample_rate: 8000
5
+ nfft: 256
6
  win_size: 240
7
  hop_size: 80
8
  win_type: hann
examples/fsmn_vad_by_webrtcvad/yaml/config-256-128-4.yaml CHANGED
@@ -2,7 +2,7 @@ model_name: "fsmn_vad"
2
 
3
  # spec
4
  sample_rate: 8000
5
- nfft: 512
6
  win_size: 256
7
  hop_size: 128
8
  win_type: hann
 
2
 
3
  # spec
4
  sample_rate: 8000
5
+ nfft: 256
6
  win_size: 256
7
  hop_size: 128
8
  win_type: hann
examples/fsmn_vad_by_webrtcvad/yaml/config-512-256-4.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "fsmn_vad"
2
+
3
+ # spec
4
+ sample_rate: 8000
5
+ nfft: 512
6
+ win_size: 256
7
+ hop_size: 128
8
+ win_type: hann
9
+
10
+ # model
11
+ fsmn_input_size: 513
12
+ fsmn_input_affine_size: 140
13
+ fsmn_hidden_size: 250
14
+ fsmn_basic_block_layers: 4
15
+ fsmn_basic_block_hidden_size: 128
16
+ fsmn_basic_block_lorder: 20
17
+ fsmn_basic_block_rorder: 0
18
+ fsmn_basic_block_lstride: 1
19
+ fsmn_basic_block_rstride: 0
20
+ fsmn_output_affine_size: 140
21
+ fsmn_output_size: 2
22
+
23
+ # lsnr
24
+ n_frame: 3
25
+ min_local_snr_db: -15
26
+ max_local_snr_db: 30
27
+ norm_tau: 1.
28
+
29
+ # data
30
+ min_snr_db: -10
31
+ max_snr_db: 20
32
+
33
+ # train
34
+ lr: 0.001
35
+ lr_scheduler: "CosineAnnealingLR"
36
+ lr_scheduler_kwargs:
37
+ T_max: 250000
38
+ eta_min: 0.0001
39
+
40
+ max_epochs: 100
41
+ clip_grad_norm: 10.0
42
+ seed: 1234
43
+
44
+ num_workers: 4
45
+ batch_size: 128
46
+ eval_steps: 25000
examples/silero_vad_by_webrtcvad/yaml/config-512-256-4.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "silero_vad"
2
+
3
+ # spec
4
+ sample_rate: 8000
5
+ nfft: 512
6
+ win_size: 512
7
+ hop_size: 256
8
+ win_type: hann
9
+
10
+ # model
11
+ encoder_in_channels: 64
12
+ encoder_hidden_channels: 128
13
+ encoder_out_channels: 128
14
+ encoder_kernel_size: 3
15
+ encoder_num_layers: 4
16
+
17
+ decoder_hidden_size: 128
18
+ decoder_num_layers: 2
19
+
20
+ # lsnr
21
+ n_frame: 3
22
+ min_local_snr_db: -15
23
+ max_local_snr_db: 30
24
+ norm_tau: 1.
25
+
26
+ # data
27
+ min_snr_db: -10
28
+ max_snr_db: 20
29
+
30
+ # train
31
+ lr: 0.001
32
+ lr_scheduler: "CosineAnnealingLR"
33
+ lr_scheduler_kwargs:
34
+ T_max: 250000
35
+ eta_min: 0.0001
36
+
37
+ max_epochs: 100
38
+ clip_grad_norm: 10.0
39
+ seed: 1234
40
+
41
+ num_workers: 4
42
+ batch_size: 128
43
+ eval_steps: 25000