| model: | |
| names: | |
| - timm_image | |
| timm_image: | |
| checkpoint_name: resnet18 | |
| mix_choice: all_logits | |
| data_types: | |
| - image | |
| train_transforms: | |
| - resize_shorter_side | |
| - center_crop | |
| - trivial_augment | |
| val_transforms: | |
| - resize_shorter_side | |
| - center_crop | |
| image_norm: imagenet | |
| image_size: null | |
| image_chan_num: 3 | |
| use_learnable_image: false | |
| max_image_num_per_column: 1 | |
| data: | |
| image: | |
| missing_value_strategy: zero | |
| text: | |
| normalize_text: false | |
| categorical: | |
| minimum_cat_count: 100 | |
| maximum_num_cat: 20 | |
| convert_to_text: false | |
| convert_to_text_template: latex | |
| numerical: | |
| convert_to_text: false | |
| scaler_with_mean: true | |
| scaler_with_std: true | |
| document: | |
| missing_value_strategy: zero | |
| label: | |
| numerical_preprocessing: standardscaler | |
| pos_label: null | |
| column_features_pooling_mode: concat | |
| mixup: | |
| turn_on: false | |
| mixup_alpha: 0.8 | |
| cutmix_alpha: 1.0 | |
| cutmix_minmax: null | |
| prob: 1.0 | |
| switch_prob: 0.5 | |
| mode: batch | |
| turn_off_epoch: 5 | |
| label_smoothing: 0.1 | |
| modality_dropout: 0 | |
| templates: | |
| turn_on: false | |
| num_templates: 30 | |
| template_length: 2048 | |
| preset_templates: | |
| - super_glue | |
| - rte | |
| custom_templates: null | |
| optim: | |
| optim_type: adamw | |
| lr: 0.0004 | |
| weight_decay: 0.001 | |
| lr_choice: layerwise_decay | |
| lr_decay: 0.9 | |
| lr_schedule: cosine_decay | |
| max_epochs: 20 | |
| max_steps: -1 | |
| warmup_steps: 0.1 | |
| end_lr: 0 | |
| lr_mult: 1 | |
| patience: 10 | |
| val_check_interval: 0.5 | |
| check_val_every_n_epoch: 1 | |
| skip_final_val: false | |
| gradient_clip_val: 1 | |
| gradient_clip_algorithm: norm | |
| track_grad_norm: -1 | |
| log_every_n_steps: 10 | |
| label_smoothing: 0 | |
| top_k: 3 | |
| top_k_average_method: greedy_soup | |
| peft: null | |
| lora: | |
| module_filter: null | |
| filter: | |
| - query | |
| - value | |
| - ^q$ | |
| - ^v$ | |
| - ^k$ | |
| - ^o$ | |
| r: 8 | |
| alpha: 8 | |
| conv_lora_expert_num: 8 | |
| loss_func: auto | |
| focal_loss: | |
| alpha: null | |
| gamma: 2.0 | |
| reduction: mean | |
| mask2former_loss: | |
| loss_cross_entropy_weight: 10.0 | |
| loss_mask_weight: 5.0 | |
| loss_dice_weight: 5.0 | |
| extra_trainable_params: [] | |
| cross_modal_align: null | |
| cross_modal_align_weight: 0 | |
| automatic_optimization: true | |
| lemda: | |
| turn_on: false | |
| arch_type: mlp_vae | |
| z_dim: 8 | |
| num_layers: 6 | |
| kld_weight: 0.1 | |
| mse_weight: 0.1 | |
| adv_weight: 0.0001 | |
| consist_weight: 0.01 | |
| consist_threshold: 0.5 | |
| lr: 0.0001 | |
| optim_type: adamw | |
| weight_decay: 1.0e-05 | |
| env: | |
| num_gpus: 0 | |
| num_nodes: 1 | |
| batch_size: 128 | |
| per_gpu_batch_size: 8 | |
| inference_batch_size_ratio: 4 | |
| precision: 32 | |
| num_workers: 2 | |
| num_workers_inference: 2 | |
| accelerator: auto | |
| fast_dev_run: false | |
| deterministic: false | |
| auto_select_gpus: true | |
| strategy: auto | |
| deepspeed_allgather_size: 1000000000.0 | |
| deepspeed_allreduce_size: 1000000000.0 | |
| compile: | |
| turn_on: false | |
| mode: default | |
| dynamic: true | |
| backend: inductor | |