IbrahimSalah commited on
Commit
89b16f6
·
verified ·
1 Parent(s): 024e0ad

Upload config.json

Browse files
Files changed (1) hide show
  1. config.json +83 -0
config.json ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "spark-tts",
3
+ "architectures": [
4
+ "SparkTTSModel"
5
+ ],
6
+ "auto_map": {
7
+ "AutoConfig": "configuration_spark_tts.SparkTTSConfig",
8
+ "AutoModel": "modeling_spark_tts.SparkTTSModel",
9
+ "AutoProcessor": "processing_spark_tts.SparkTTSProcessor"
10
+ },
11
+ "processor_class": "processing_spark_tts.SparkTTSProcessor",
12
+ "llm_model_name_or_path": "./LLM",
13
+ "bicodec_model_name_or_path": "./BiCodec",
14
+ "wav2vec2_model_name_or_path": "./wav2vec2-large-xlsr-53",
15
+ "sample_rate": 16000,
16
+ "highpass_cutoff_freq": 40,
17
+ "latent_hop_length": 320,
18
+ "ref_segment_duration": 6.0,
19
+ "volume_normalize": true,
20
+ "torch_dtype": "bfloat16",
21
+ "transformers_version": "4.50.3",
22
+ "_commit_hash": null,
23
+ "bicodec_config": {
24
+ "mel_params": {
25
+ "sample_rate": 16000,
26
+ "n_fft": 1024,
27
+ "win_length": 640,
28
+ "hop_length": 320,
29
+ "mel_fmin": 10,
30
+ "mel_fmax": null,
31
+ "num_mels": 128
32
+ },
33
+ "encoder_config": {
34
+ "input_channels": 1024,
35
+ "vocos_dim": 384,
36
+ "vocos_intermediate_dim": 2048,
37
+ "vocos_num_layers": 12,
38
+ "out_channels": 1024,
39
+ "sample_ratios": [1, 1]
40
+ },
41
+ "decoder_config": {
42
+ "input_channel": 1024,
43
+ "channels": 1536,
44
+ "rates": [8, 5, 4, 2],
45
+ "kernel_sizes": [16, 11, 8, 4]
46
+ },
47
+ "quantizer_config": {
48
+ "input_dim": 1024,
49
+ "codebook_size": 8192,
50
+ "codebook_dim": 8,
51
+ "commitment": 0.25,
52
+ "codebook_loss_weight": 2.0,
53
+ "decay": 0.99,
54
+ "threshold_ema_dead_code": 0.2
55
+ },
56
+ "speaker_encoder_config": {
57
+ "input_dim": 128,
58
+ "out_dim": 1024,
59
+ "latent_dim": 128,
60
+ "token_num": 32,
61
+ "fsq_levels": [4, 4, 4, 4, 4, 4],
62
+ "fsq_num_quantizers": 1
63
+ },
64
+ "prenet_config": {
65
+ "input_channels": 1024,
66
+ "vocos_dim": 384,
67
+ "vocos_intermediate_dim": 2048,
68
+ "vocos_num_layers": 12,
69
+ "out_channels": 1024,
70
+ "condition_dim": 1024,
71
+ "sample_ratios": [1, 1],
72
+ "use_tanh_at_final": false
73
+ },
74
+ "postnet_config": {
75
+ "input_channels": 1024,
76
+ "vocos_dim": 384,
77
+ "vocos_intermediate_dim": 2048,
78
+ "vocos_num_layers": 6,
79
+ "out_channels": 1024,
80
+ "use_tanh_at_final": false
81
+ }
82
+ }
83
+ }