PY007's picture
Upload LTX2 distilled model weights (converted to diffusers format)
3ea42d7 verified
{
"audio_vae": {
"model": {
"params": {
"ddconfig": {
"double_z": true,
"mel_bins": 64,
"z_channels": 8,
"resolution": 256,
"downsample_time": false,
"in_channels": 2,
"out_ch": 2,
"ch": 128,
"ch_mult": [
1,
2,
4
],
"num_res_blocks": 2,
"attn_resolutions": [],
"dropout": 0.0,
"mid_block_add_attention": false,
"norm_type": "pixel",
"causality_axis": "height"
},
"sampling_rate": 16000
}
},
"preprocessing": {
"audio": {
"sampling_rate": 16000,
"max_wav_value": 32768.0,
"duration": 5.12,
"stereo": true,
"causal_padding": 3
},
"stft": {
"filter_length": 1024,
"hop_length": 160,
"win_length": 1024,
"causal": true
},
"mel": {
"n_mel_channels": 64,
"mel_fmin": 0,
"mel_fmax": 8000
}
}
},
"_class_name": "LTX2AudioDecoder"
}