{
  "vae": {
    "_class_name": "CausalVideoAutoencoder",
    "dims": 3,
    "in_channels": 3,
    "out_channels": 3,
    "latent_channels": 128,
    "encoder_blocks": [
      [
        "res_x",
        {
          "num_layers": 4
        }
      ],
      [
        "compress_space_res",
        {
          "multiplier": 2
        }
      ],
      [
        "res_x",
        {
          "num_layers": 6
        }
      ],
      [
        "compress_time_res",
        {
          "multiplier": 2
        }
      ],
      [
        "res_x",
        {
          "num_layers": 6
        }
      ],
      [
        "compress_all_res",
        {
          "multiplier": 2
        }
      ],
      [
        "res_x",
        {
          "num_layers": 2
        }
      ],
      [
        "compress_all_res",
        {
          "multiplier": 2
        }
      ],
      [
        "res_x",
        {
          "num_layers": 2
        }
      ]
    ],
    "decoder_blocks": [
      [
        "res_x",
        {
          "num_layers": 5,
          "inject_noise": false
        }
      ],
      [
        "compress_all",
        {
          "residual": true,
          "multiplier": 2
        }
      ],
      [
        "res_x",
        {
          "num_layers": 5,
          "inject_noise": false
        }
      ],
      [
        "compress_all",
        {
          "residual": true,
          "multiplier": 2
        }
      ],
      [
        "res_x",
        {
          "num_layers": 5,
          "inject_noise": false
        }
      ],
      [
        "compress_all",
        {
          "residual": true,
          "multiplier": 2
        }
      ],
      [
        "res_x",
        {
          "num_layers": 5,
          "inject_noise": false
        }
      ]
    ],
    "scaling_factor": 1.0,
    "norm_layer": "pixel_norm",
    "patch_size": 4,
    "latent_log_var": "uniform",
    "use_quant_conv": false,
    "causal_decoder": false,
    "timestep_conditioning": false,
    "normalize_latent_channels": false,
    "encoder_base_channels": 128,
    "decoder_base_channels": 128
  },
  "_class_name": "CausalVideoAutoencoder"
}