{ "architectures": [ "InternVideo2Stage2VideoEncoder" ], "auto_map": { "AutoConfig": "config.InternVideo2Config", "AutoModel": "model.InternVideo2Stage2VideoEncoder" }, "auto_resume": false, "batch_size": 64, "batch_size_test": 4, "best_key": [ "msrvtt_1k_test_match", "t2v_r1" ], "compile_model": false, "criterion": { "clip_loss_ratio": [ 1.0, 1.0 ], "distill_final_features": true, "loss_weight": { "mlm": 1.0, "mvm": 0.0, "uta": 0.0, "vtc": 1.0, "vtm": 1.0 }, "mlm_masking_prob": 0.5, "vtm_hard_neg": true }, "debug": false, "deep_fusion": false, "deepspeed": { "enable": true, "stage": 1 }, "delete_ds_optim_states": true, "device": "cuda", "dist_url": "env://", "evaluate": false, "evaluation": { "eval_frame_ensemble": "concat", "eval_offload": true, "eval_x_only": false, "k_test": 128 }, "gradient_checkpointing": true, "inputs": { "batch_size": { "image": 64, "video": 64 }, "batch_size_test": { "image": 4, "video": 4 }, "image_res": 224, "max_txt_l": { "image": 32, "video": 32 }, "video_input": { "num_frames": 8, "num_frames_test": 8, "random_aug": false, "sample_type": "rand", "sample_type_test": "middle" } }, "jump_evaluate": false, "log_freq": 100, "max_txt_l": 32, "mode": "pt", "model": { "embed_dim": 512, "find_unused_parameters": false, "model_cls": "InternVideo2_Stage2", "multimodal": { "enable": true }, "temp": 0.07, "text_encoder": "bert_large", "vision_encoder": { "checkpoint_num": 40, "clip_embed_dim": 768, "clip_input_resolution": 224, "clip_norm_type": "l2", "clip_return_layer": 6, "clip_student_return_interval": 1, "clip_teacher": null, "clip_teacher_embed_dim": 3200, "clip_teacher_final_dim": 768, "clip_teacher_return_interval": 1, "d_model": 1408, "image_mask_ratio": 0.5, "image_mask_type": "random", "img_size": 224, "keep_temporal": false, "name": "pretrain_internvideo2_1b_patch14_224", "num_frames": 8, "only_mask": true, "patch_size": 14, "pretrained": "/home/linanxi/InternVideo/checkpoints/InternVideo2-stage2_1b-224p-f4/InternVideo2-stage2_1b-224p-f4.pt", "sep_image_video_pos_embed": true, "tubelet_size": 1, "use_checkpoint": false, "use_flash_attn": true, "use_fused_mlp": true, "use_fused_rmsnorm": true, "video_mask_ratio": 0.8, "video_mask_type": "random" } }, "model_type": "internvideo2", "num_frames": 8, "num_frames_test": 8, "num_workers": 6, "optimizer": { "different_lr": { "enable": false, "lr": 0.001, "module_names": [] }, "lr": 5e-05, "max_grad_norm": 3.0, "opt": "adamW", "opt_betas": [ 0.9, 0.98 ], "weight_decay": 0.05 }, "output_dir": null, "pretrained_path": "", "resume": false, "save_ckpt_iter": null, "save_latest": true, "scheduler": { "epochs": 10, "min_lr_multi": 0.01, "sched": "cosine", "warmup_epochs": 1 }, "seed": 42, "test_file": { "didemo_ret_test": "available_corpus[\"didemo_ret_test\"]", "msrvtt_1k_test": "available_corpus[\"msrvtt_1k_test\"]" }, "test_types": [ "msrvtt_1k_test", "didemo_ret_test" ], "text_enc": "bert_large", "tokenizer": null, "torch_dtype": "float16", "train_file": "available_corpus[\"pretrain_example_data_1B\"]", "transformers_version": "4.47.0", "use_bf16": true, "use_flash_sdp": false, "use_half_precision": false, "use_mem_efficient_sdp": false, "wandb": { "enable": false, "entity": "opengvlab", "project": "InternVideo2-Stage2" } }