| { | |
| "architectures": [ | |
| "InternVideo2Stage2VideoEncoder" | |
| ], | |
| "auto_map": { | |
| "AutoConfig": "config.InternVideo2Config", | |
| "AutoModel": "model.InternVideo2Stage2VideoEncoder" | |
| }, | |
| "auto_resume": false, | |
| "batch_size": 64, | |
| "batch_size_test": 4, | |
| "best_key": [ | |
| "msrvtt_1k_test_match", | |
| "t2v_r1" | |
| ], | |
| "compile_model": false, | |
| "criterion": { | |
| "clip_loss_ratio": [ | |
| 1.0, | |
| 1.0 | |
| ], | |
| "distill_final_features": true, | |
| "loss_weight": { | |
| "mlm": 1.0, | |
| "mvm": 0.0, | |
| "uta": 0.0, | |
| "vtc": 1.0, | |
| "vtm": 1.0 | |
| }, | |
| "mlm_masking_prob": 0.5, | |
| "vtm_hard_neg": true | |
| }, | |
| "debug": false, | |
| "deep_fusion": false, | |
| "deepspeed": { | |
| "enable": true, | |
| "stage": 1 | |
| }, | |
| "delete_ds_optim_states": true, | |
| "device": "cuda", | |
| "dist_url": "env://", | |
| "evaluate": false, | |
| "evaluation": { | |
| "eval_frame_ensemble": "concat", | |
| "eval_offload": true, | |
| "eval_x_only": false, | |
| "k_test": 128 | |
| }, | |
| "gradient_checkpointing": true, | |
| "inputs": { | |
| "batch_size": { | |
| "image": 64, | |
| "video": 64 | |
| }, | |
| "batch_size_test": { | |
| "image": 4, | |
| "video": 4 | |
| }, | |
| "image_res": 224, | |
| "max_txt_l": { | |
| "image": 32, | |
| "video": 32 | |
| }, | |
| "video_input": { | |
| "num_frames": 8, | |
| "num_frames_test": 8, | |
| "random_aug": false, | |
| "sample_type": "rand", | |
| "sample_type_test": "middle" | |
| } | |
| }, | |
| "jump_evaluate": false, | |
| "log_freq": 100, | |
| "max_txt_l": 32, | |
| "mode": "pt", | |
| "model": { | |
| "embed_dim": 512, | |
| "find_unused_parameters": false, | |
| "model_cls": "InternVideo2_Stage2", | |
| "multimodal": { | |
| "enable": true | |
| }, | |
| "temp": 0.07, | |
| "text_encoder": "bert_large", | |
| "vision_encoder": { | |
| "checkpoint_num": 40, | |
| "clip_embed_dim": 768, | |
| "clip_input_resolution": 224, | |
| "clip_norm_type": "l2", | |
| "clip_return_layer": 6, | |
| "clip_student_return_interval": 1, | |
| "clip_teacher": null, | |
| "clip_teacher_embed_dim": 3200, | |
| "clip_teacher_final_dim": 768, | |
| "clip_teacher_return_interval": 1, | |
| "d_model": 1408, | |
| "image_mask_ratio": 0.5, | |
| "image_mask_type": "random", | |
| "img_size": 224, | |
| "keep_temporal": false, | |
| "name": "pretrain_internvideo2_1b_patch14_224", | |
| "num_frames": 8, | |
| "only_mask": true, | |
| "patch_size": 14, | |
| "pretrained": "/home/linanxi/InternVideo/checkpoints/InternVideo2-stage2_1b-224p-f4/InternVideo2-stage2_1b-224p-f4.pt", | |
| "sep_image_video_pos_embed": true, | |
| "tubelet_size": 1, | |
| "use_checkpoint": false, | |
| "use_flash_attn": true, | |
| "use_fused_mlp": true, | |
| "use_fused_rmsnorm": true, | |
| "video_mask_ratio": 0.8, | |
| "video_mask_type": "random" | |
| } | |
| }, | |
| "model_type": "internvideo2", | |
| "num_frames": 8, | |
| "num_frames_test": 8, | |
| "num_workers": 6, | |
| "optimizer": { | |
| "different_lr": { | |
| "enable": false, | |
| "lr": 0.001, | |
| "module_names": [] | |
| }, | |
| "lr": 5e-05, | |
| "max_grad_norm": 3.0, | |
| "opt": "adamW", | |
| "opt_betas": [ | |
| 0.9, | |
| 0.98 | |
| ], | |
| "weight_decay": 0.05 | |
| }, | |
| "output_dir": null, | |
| "pretrained_path": "", | |
| "resume": false, | |
| "save_ckpt_iter": null, | |
| "save_latest": true, | |
| "scheduler": { | |
| "epochs": 10, | |
| "min_lr_multi": 0.01, | |
| "sched": "cosine", | |
| "warmup_epochs": 1 | |
| }, | |
| "seed": 42, | |
| "test_file": { | |
| "didemo_ret_test": "available_corpus[\"didemo_ret_test\"]", | |
| "msrvtt_1k_test": "available_corpus[\"msrvtt_1k_test\"]" | |
| }, | |
| "test_types": [ | |
| "msrvtt_1k_test", | |
| "didemo_ret_test" | |
| ], | |
| "text_enc": "bert_large", | |
| "tokenizer": null, | |
| "torch_dtype": "float16", | |
| "train_file": "available_corpus[\"pretrain_example_data_1B\"]", | |
| "transformers_version": "4.47.0", | |
| "use_bf16": true, | |
| "use_flash_sdp": false, | |
| "use_half_precision": false, | |
| "use_mem_efficient_sdp": false, | |
| "wandb": { | |
| "enable": false, | |
| "entity": "opengvlab", | |
| "project": "InternVideo2-Stage2" | |
| } | |
| } | |