HoneyTian commited on
Commit
0534cfa
·
1 Parent(s): 8a458a9
examples/fsmn_vad_by_webrtcvad/run.sh CHANGED
@@ -4,11 +4,11 @@
4
 
5
  bash run.sh --stage 3 --stop_stage 5 --system_version centos \
6
  --file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
7
- --final_model_name fsmn-vad-nx2-dns3-256-128-4-20251125 \
8
  --noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
9
  --speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
10
  /data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav" \
11
- --config_file yaml/config-256-128-4.yaml
12
 
13
 
14
  END
 
4
 
5
  bash run.sh --stage 3 --stop_stage 5 --system_version centos \
6
  --file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
7
+ --final_model_name fsmn-vad-nx2-dns3-240-80-4-20251125 \
8
  --noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
9
  --speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
10
  /data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav" \
11
+ --config_file yaml/config-240-80-4.yaml
12
 
13
 
14
  END
examples/silero_vad_by_webrtcvad/run.sh CHANGED
@@ -4,11 +4,11 @@
4
 
5
  bash run.sh --stage 3 --stop_stage 5 --system_version centos \
6
  --file_folder_name silero-vad-by-webrtcvad-nx2-dns3 \
7
- --final_model_name silero-vad-nx2-dns3-512-256-4-20251125 \
8
  --noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
9
  --speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
10
  /data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav" \
11
- --config_file yaml/config-512-256-4.yaml
12
 
13
  END
14
 
 
4
 
5
  bash run.sh --stage 3 --stop_stage 5 --system_version centos \
6
  --file_folder_name silero-vad-by-webrtcvad-nx2-dns3 \
7
+ --final_model_name silero-vad-nx2-dns3-256-128-h64-e8-d2-20251202 \
8
  --noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
9
  --speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
10
  /data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav" \
11
+ --config_file yaml/config-256-128-h64-e8-d2.yaml
12
 
13
  END
14
 
examples/silero_vad_by_webrtcvad/yaml/{config-240-80-4.yaml → config-240-80-h128-e4-d1.yaml} RENAMED
@@ -8,14 +8,14 @@ hop_size: 80
8
  win_type: hann
9
 
10
  # model
11
- encoder_in_channels: 64
12
  encoder_hidden_channels: 128
13
  encoder_out_channels: 128
14
  encoder_kernel_size: 3
15
  encoder_num_layers: 4
16
 
17
  decoder_hidden_size: 128
18
- decoder_num_layers: 2
19
 
20
  # lsnr
21
  n_frame: 3
 
8
  win_type: hann
9
 
10
  # model
11
+ encoder_in_channels: 128
12
  encoder_hidden_channels: 128
13
  encoder_out_channels: 128
14
  encoder_kernel_size: 3
15
  encoder_num_layers: 4
16
 
17
  decoder_hidden_size: 128
18
+ decoder_num_layers: 1
19
 
20
  # lsnr
21
  n_frame: 3
examples/silero_vad_by_webrtcvad/yaml/config-240-80-h64-e4-d1.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "silero_vad"
2
+
3
+ # spec
4
+ sample_rate: 8000
5
+ nfft: 256
6
+ win_size: 240
7
+ hop_size: 80
8
+ win_type: hann
9
+
10
+ # model
11
+ encoder_in_channels: 64
12
+ encoder_hidden_channels: 64
13
+ encoder_out_channels: 64
14
+ encoder_kernel_size: 3
15
+ encoder_num_layers: 4
16
+
17
+ decoder_hidden_size: 64
18
+ decoder_num_layers: 1
19
+
20
+ # lsnr
21
+ n_frame: 3
22
+ min_local_snr_db: -15
23
+ max_local_snr_db: 30
24
+ norm_tau: 1.
25
+
26
+ # data
27
+ min_snr_db: -10
28
+ max_snr_db: 20
29
+
30
+ # train
31
+ lr: 0.001
32
+ lr_scheduler: "CosineAnnealingLR"
33
+ lr_scheduler_kwargs:
34
+ T_max: 250000
35
+ eta_min: 0.0001
36
+
37
+ max_epochs: 100
38
+ clip_grad_norm: 10.0
39
+ seed: 1234
40
+
41
+ num_workers: 4
42
+ batch_size: 128
43
+ eval_steps: 25000
examples/silero_vad_by_webrtcvad/yaml/{config-256-128-4.yaml → config-256-128-h128-e4-d1.yaml} RENAMED
@@ -8,14 +8,14 @@ hop_size: 128
8
  win_type: hann
9
 
10
  # model
11
- encoder_in_channels: 64
12
  encoder_hidden_channels: 128
13
  encoder_out_channels: 128
14
  encoder_kernel_size: 3
15
  encoder_num_layers: 4
16
 
17
  decoder_hidden_size: 128
18
- decoder_num_layers: 2
19
 
20
  # lsnr
21
  n_frame: 3
 
8
  win_type: hann
9
 
10
  # model
11
+ encoder_in_channels: 128
12
  encoder_hidden_channels: 128
13
  encoder_out_channels: 128
14
  encoder_kernel_size: 3
15
  encoder_num_layers: 4
16
 
17
  decoder_hidden_size: 128
18
+ decoder_num_layers: 1
19
 
20
  # lsnr
21
  n_frame: 3
examples/silero_vad_by_webrtcvad/yaml/config-256-128-h128-e8-d2.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "silero_vad"
2
+
3
+ # spec
4
+ sample_rate: 8000
5
+ nfft: 256
6
+ win_size: 256
7
+ hop_size: 128
8
+ win_type: hann
9
+
10
+ # model
11
+ encoder_in_channels: 128
12
+ encoder_hidden_channels: 128
13
+ encoder_out_channels: 128
14
+ encoder_kernel_size: 3
15
+ encoder_num_layers: 8
16
+
17
+ decoder_hidden_size: 128
18
+ decoder_num_layers: 2
19
+
20
+ # lsnr
21
+ n_frame: 3
22
+ min_local_snr_db: -15
23
+ max_local_snr_db: 30
24
+ norm_tau: 1.
25
+
26
+ # data
27
+ min_snr_db: -10
28
+ max_snr_db: 20
29
+
30
+ # train
31
+ lr: 0.001
32
+ lr_scheduler: "CosineAnnealingLR"
33
+ lr_scheduler_kwargs:
34
+ T_max: 250000
35
+ eta_min: 0.0001
36
+
37
+ max_epochs: 100
38
+ clip_grad_norm: 10.0
39
+ seed: 1234
40
+
41
+ num_workers: 4
42
+ batch_size: 128
43
+ eval_steps: 25000
examples/silero_vad_by_webrtcvad/yaml/config-256-128-h64-e4-d1.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "silero_vad"
2
+
3
+ # spec
4
+ sample_rate: 8000
5
+ nfft: 256
6
+ win_size: 256
7
+ hop_size: 128
8
+ win_type: hann
9
+
10
+ # model
11
+ encoder_in_channels: 64
12
+ encoder_hidden_channels: 64
13
+ encoder_out_channels: 64
14
+ encoder_kernel_size: 3
15
+ encoder_num_layers: 4
16
+
17
+ decoder_hidden_size: 64
18
+ decoder_num_layers: 1
19
+
20
+ # lsnr
21
+ n_frame: 3
22
+ min_local_snr_db: -15
23
+ max_local_snr_db: 30
24
+ norm_tau: 1.
25
+
26
+ # data
27
+ min_snr_db: -10
28
+ max_snr_db: 20
29
+
30
+ # train
31
+ lr: 0.001
32
+ lr_scheduler: "CosineAnnealingLR"
33
+ lr_scheduler_kwargs:
34
+ T_max: 250000
35
+ eta_min: 0.0001
36
+
37
+ max_epochs: 100
38
+ clip_grad_norm: 10.0
39
+ seed: 1234
40
+
41
+ num_workers: 4
42
+ batch_size: 128
43
+ eval_steps: 25000
examples/silero_vad_by_webrtcvad/yaml/config-256-128-h64-e8-d2.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "silero_vad"
2
+
3
+ # spec
4
+ sample_rate: 8000
5
+ nfft: 256
6
+ win_size: 256
7
+ hop_size: 128
8
+ win_type: hann
9
+
10
+ # model
11
+ encoder_in_channels: 64
12
+ encoder_hidden_channels: 64
13
+ encoder_out_channels: 64
14
+ encoder_kernel_size: 3
15
+ encoder_num_layers: 8
16
+
17
+ decoder_hidden_size: 64
18
+ decoder_num_layers: 2
19
+
20
+ # lsnr
21
+ n_frame: 3
22
+ min_local_snr_db: -15
23
+ max_local_snr_db: 30
24
+ norm_tau: 1.
25
+
26
+ # data
27
+ min_snr_db: -10
28
+ max_snr_db: 20
29
+
30
+ # train
31
+ lr: 0.001
32
+ lr_scheduler: "CosineAnnealingLR"
33
+ lr_scheduler_kwargs:
34
+ T_max: 250000
35
+ eta_min: 0.0001
36
+
37
+ max_epochs: 100
38
+ clip_grad_norm: 10.0
39
+ seed: 1234
40
+
41
+ num_workers: 4
42
+ batch_size: 128
43
+ eval_steps: 25000
examples/silero_vad_by_webrtcvad/yaml/{config-512-256-4.yaml → config-512-256-h128-e4-d1.yaml} RENAMED
@@ -8,14 +8,14 @@ hop_size: 256
8
  win_type: hann
9
 
10
  # model
11
- encoder_in_channels: 64
12
  encoder_hidden_channels: 128
13
  encoder_out_channels: 128
14
  encoder_kernel_size: 3
15
  encoder_num_layers: 4
16
 
17
  decoder_hidden_size: 128
18
- decoder_num_layers: 2
19
 
20
  # lsnr
21
  n_frame: 3
 
8
  win_type: hann
9
 
10
  # model
11
+ encoder_in_channels: 128
12
  encoder_hidden_channels: 128
13
  encoder_out_channels: 128
14
  encoder_kernel_size: 3
15
  encoder_num_layers: 4
16
 
17
  decoder_hidden_size: 128
18
+ decoder_num_layers: 1
19
 
20
  # lsnr
21
  n_frame: 3
examples/silero_vad_by_webrtcvad/yaml/config-512-256-h256-e4-d1.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "silero_vad"
2
+
3
+ # spec
4
+ sample_rate: 8000
5
+ nfft: 512
6
+ win_size: 512
7
+ hop_size: 256
8
+ win_type: hann
9
+
10
+ # model
11
+ encoder_in_channels: 256
12
+ encoder_hidden_channels: 256
13
+ encoder_out_channels: 256
14
+ encoder_kernel_size: 3
15
+ encoder_num_layers: 4
16
+
17
+ decoder_hidden_size: 256
18
+ decoder_num_layers: 1
19
+
20
+ # lsnr
21
+ n_frame: 3
22
+ min_local_snr_db: -15
23
+ max_local_snr_db: 30
24
+ norm_tau: 1.
25
+
26
+ # data
27
+ min_snr_db: -10
28
+ max_snr_db: 20
29
+
30
+ # train
31
+ lr: 0.001
32
+ lr_scheduler: "CosineAnnealingLR"
33
+ lr_scheduler_kwargs:
34
+ T_max: 250000
35
+ eta_min: 0.0001
36
+
37
+ max_epochs: 100
38
+ clip_grad_norm: 10.0
39
+ seed: 1234
40
+
41
+ num_workers: 4
42
+ batch_size: 128
43
+ eval_steps: 25000
toolbox/torchaudio/models/vad/wav2vec2_vad/__init__.py DELETED
@@ -1,6 +0,0 @@
1
- #!/usr/bin/python3
2
- # -*- coding: utf-8 -*-
3
-
4
-
5
- if __name__ == "__main__":
6
- pass
 
 
 
 
 
 
 
toolbox/torchaudio/models/vad/wav2vec2_vad/modeling_wav2vec2.py DELETED
@@ -1,6 +0,0 @@
1
- #!/usr/bin/python3
2
- # -*- coding: utf-8 -*-
3
-
4
-
5
- if __name__ == "__main__":
6
- pass