| { | |
| "run_id": "0903_libero_goal_augsteps_0_wo_flash_attention_wo_augsteps_two_view_action_chunk_16_pretrained_vlm", | |
| "run_root_dir": "./playground/Checkpoints", | |
| "seed": 42, | |
| "trackers": [ | |
| "jsonl", | |
| "wandb" | |
| ], | |
| "wandb_entity": "michaelyu-1101-fudanuniversity", | |
| "wandb_project": "Internvla", | |
| "is_debug": false, | |
| "framework": { | |
| "framework_py": "DinoQFormerACT", | |
| "qwenvl": { | |
| "base_vlm": "/mnt/phwfile/efm_t/zhuyangkun_tmp_need_del/exp/exp_08_09/manip_sys2_qwen25_3b_onevision_molmo_a0all_refsp20/checkpoint-20000", | |
| "attn_implementation": "flash_attention_2", | |
| "vl_hidden_dim": 2048 | |
| }, | |
| "dino": { | |
| "dino_backbone": "dinov2_vitl14" | |
| }, | |
| "layer_qformer": { | |
| "qformer_end_layer": 37, | |
| "qformer_start_layer": 36, | |
| "num_query_tokens": 64, | |
| "grad_scale": 0.5 | |
| }, | |
| "action_model": { | |
| "action_model_type": "DiT-B", | |
| "action_hidden_dim": 768, | |
| "action_dim": 7, | |
| "use_ema": false, | |
| "future_action_window_size": 7, | |
| "past_action_window_size": 0, | |
| "repeated_diffusion_steps": 8 | |
| }, | |
| "reduce_in_full_precision": true | |
| }, | |
| "datasets": { | |
| "vlm_data": { | |
| "dataformat": "llava_json", | |
| "dataset_use": "asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en", | |
| "eval_dataset": "aokvqa_cauldron_llava_format", | |
| "data_flatten": false, | |
| "base_interval": 2, | |
| "max_pixels": 50176, | |
| "min_pixels": 784, | |
| "fix_image_size": [ | |
| 224, | |
| 224 | |
| ], | |
| "model_max_length": 1024, | |
| "model_type": "qwen2.5vl", | |
| "per_device_batch_size": 4 | |
| }, | |
| "vla_data": { | |
| "dataset_py": "lerobot_libero", | |
| "data_root_dir": "playground/Datasets/LEROBOT_LIBERO_DATA", | |
| "data_mix": "libero_goal", | |
| "action_type": "delta_qpos", | |
| "CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.", | |
| "CoT_answer": "bbox", | |
| "default_image_resolution": [ | |
| 3, | |
| 224, | |
| 224 | |
| ], | |
| "per_device_batch_size": 16, | |
| "load_all_data_for_training": true, | |
| "obs": [ | |
| "image_0" | |
| ] | |
| } | |
| }, | |
| "trainer": { | |
| "epochs": 100, | |
| "max_train_steps": 100000, | |
| "num_warmup_steps": 5000, | |
| "save_interval": 10000, | |
| "eval_interval": 1000, | |
| "learning_rate": { | |
| "base": 2.5e-05 | |
| }, | |
| "lr_scheduler_type": "cosine_with_min_lr", | |
| "scheduler_specific_kwargs": { | |
| "min_lr": 1e-06 | |
| }, | |
| "freeze_modules": "", | |
| "loss_scale": { | |
| "vla": 1.0, | |
| "vlm": 0.1 | |
| }, | |
| "max_grad_norm": 1.0, | |
| "warmup_ratio": 0.1, | |
| "weight_decay": 0.0, | |
| "logging_frequency": 10, | |
| "gradient_clipping": 1.0, | |
| "gradient_accumulation_steps": 1, | |
| "optimizer": { | |
| "name": "AdamW", | |
| "betas": [ | |
| 0.9, | |
| 0.95 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-08 | |
| }, | |
| "is_resume": false, | |
| "resume_epoch": null, | |
| "resume_step": null, | |
| "enable_gradient_checkpointing": true, | |
| "enable_mixed_precision_training": true | |
| }, | |
| "output_dir": "./playground/Checkpoints/0903_libero_goal_augsteps_0_wo_flash_attention_wo_augsteps_two_view_action_chunk_16_pretrained_vlm" | |
| } |