PY007's picture
Upload LTX2 distilled model weights (converted to diffusers format)
7b1720d verified
{
"vae": {
"_class_name": "CausalVideoAutoencoder",
"dims": 3,
"in_channels": 3,
"out_channels": 3,
"latent_channels": 128,
"encoder_blocks": [
[
"res_x",
{
"num_layers": 4
}
],
[
"compress_space_res",
{
"multiplier": 2
}
],
[
"res_x",
{
"num_layers": 6
}
],
[
"compress_time_res",
{
"multiplier": 2
}
],
[
"res_x",
{
"num_layers": 6
}
],
[
"compress_all_res",
{
"multiplier": 2
}
],
[
"res_x",
{
"num_layers": 2
}
],
[
"compress_all_res",
{
"multiplier": 2
}
],
[
"res_x",
{
"num_layers": 2
}
]
],
"decoder_blocks": [
[
"res_x",
{
"num_layers": 5,
"inject_noise": false
}
],
[
"compress_all",
{
"residual": true,
"multiplier": 2
}
],
[
"res_x",
{
"num_layers": 5,
"inject_noise": false
}
],
[
"compress_all",
{
"residual": true,
"multiplier": 2
}
],
[
"res_x",
{
"num_layers": 5,
"inject_noise": false
}
],
[
"compress_all",
{
"residual": true,
"multiplier": 2
}
],
[
"res_x",
{
"num_layers": 5,
"inject_noise": false
}
]
],
"scaling_factor": 1.0,
"norm_layer": "pixel_norm",
"patch_size": 4,
"latent_log_var": "uniform",
"use_quant_conv": false,
"causal_decoder": false,
"timestep_conditioning": false,
"normalize_latent_channels": false,
"encoder_base_channels": 128,
"decoder_base_channels": 128
},
"_class_name": "CausalVideoAutoencoder"
}