| { | |
| "adam_beta1": 0.9, | |
| "adam_beta2": 0.999, | |
| "adam_weight_decay": 0.01, | |
| "amp": false, | |
| "class_conditional": false, | |
| "class_unconditional_prob": 0.1, | |
| "clip_grad_norm": 1.0, | |
| "dataset_name": "roc", | |
| "dim_ae": 64, | |
| "disable_dropout": false, | |
| "dropout": 0.1, | |
| "ema_decay": 0.9999, | |
| "ema_update_every": 1, | |
| "enc_dec_model": "facebook/bart-base", | |
| "eval": false, | |
| "eval_batch_size": 32, | |
| "eval_every": 1000, | |
| "eval_test": false, | |
| "gradient_accumulation_steps": 1, | |
| "init_path": null, | |
| "l2_normalize_latents": true, | |
| "latent_dim": 64, | |
| "latent_model_path": "saved_latent_models/roc/2024-11-24_09-55-03", | |
| "learning_rate": 0.0001, | |
| "lm_mode": "freeze", | |
| "loss_type": "l2", | |
| "lr_schedule": "linear", | |
| "lr_warmup_steps": 1000, | |
| "max_seq_len": 64, | |
| "mixed_precision": "no", | |
| "normalize_latent": false, | |
| "num_decoder_latents": 32, | |
| "num_dense_connections": 3, | |
| "num_devices": 1, | |
| "num_encoder_latents": 32, | |
| "num_layers": 3, | |
| "num_samples": 1000, | |
| "num_train_steps": 50000, | |
| "objective": "pred_v", | |
| "optimizer": "adamw", | |
| "output_dir": "saved_latent_models/roc/2024-11-24_09-55-03", | |
| "resume_dir": null, | |
| "resume_training": false, | |
| "sampler": "ddpm", | |
| "sampling_schedule": null, | |
| "sampling_timesteps": 250, | |
| "save_and_sample_every": 5000, | |
| "save_dir": "saved_latent_models", | |
| "scale": 1.0, | |
| "scale_shift": true, | |
| "self_condition": true, | |
| "seq2seq_candidates": 5, | |
| "seq2seq_unconditional_prob": 0.1, | |
| "train_batch_size": 256, | |
| "train_prob_self_cond": 0.5, | |
| "train_schedule": "cosine", | |
| "trainable_params": 187928960, | |
| "tx_depth": 12, | |
| "tx_dim": 768, | |
| "wandb_name": "bart-roc-l2norm-test-32-64" | |
| } |