{ "audio_vae": { "model": { "params": { "ddconfig": { "double_z": true, "mel_bins": 64, "z_channels": 8, "resolution": 256, "downsample_time": false, "in_channels": 2, "out_ch": 2, "ch": 128, "ch_mult": [ 1, 2, 4 ], "num_res_blocks": 2, "attn_resolutions": [], "dropout": 0.0, "mid_block_add_attention": false, "norm_type": "pixel", "causality_axis": "height" }, "sampling_rate": 16000 } }, "preprocessing": { "audio": { "sampling_rate": 16000, "max_wav_value": 32768.0, "duration": 5.12, "stereo": true, "causal_padding": 3 }, "stft": { "filter_length": 1024, "hop_length": 160, "win_length": 1024, "causal": true }, "mel": { "n_mel_channels": 64, "mel_fmin": 0, "mel_fmax": 8000 } } }, "_class_name": "LTX2AudioDecoder" }