| vae: | |
| use_downsample: true | |
| num_latents: 256 | |
| point_feats: 3 | |
| out_dim: 1 | |
| embed_dim: 64 | |
| width: 768 | |
| heads: 12 | |
| num_encoder_layers: 8 | |
| num_decoder_layers: 16 | |
| init_scale: 0.25 | |
| qkv_bias: false | |
| use_ln_post: true | |
| use_udf_extraction: true | |
| token_scales: | |
| - 128.0 | |
| - 256.0 | |
| - 384.0 | |
| - 512.0 | |
| - 640.0 | |
| - 1024.0 | |
| - 2048.0 | |
| token_probability: | |
| - 0.025 | |
| - 0.025 | |
| - 0.025 | |
| - 0.025 | |
| - 0.05 | |
| - 0.2 | |
| - 0.65 | |