WishArdently's picture
Upload InternVideo2Stage2VideoEncoder
edf2ce7 verified
{
"architectures": [
"InternVideo2Stage2VideoEncoder"
],
"auto_map": {
"AutoConfig": "config.InternVideo2Config",
"AutoModel": "model.InternVideo2Stage2VideoEncoder"
},
"auto_resume": false,
"batch_size": 64,
"batch_size_test": 4,
"best_key": [
"msrvtt_1k_test_match",
"t2v_r1"
],
"compile_model": false,
"criterion": {
"clip_loss_ratio": [
1.0,
1.0
],
"distill_final_features": true,
"loss_weight": {
"mlm": 1.0,
"mvm": 0.0,
"uta": 0.0,
"vtc": 1.0,
"vtm": 1.0
},
"mlm_masking_prob": 0.5,
"vtm_hard_neg": true
},
"debug": false,
"deep_fusion": false,
"deepspeed": {
"enable": true,
"stage": 1
},
"delete_ds_optim_states": true,
"device": "cuda",
"dist_url": "env://",
"evaluate": false,
"evaluation": {
"eval_frame_ensemble": "concat",
"eval_offload": true,
"eval_x_only": false,
"k_test": 128
},
"gradient_checkpointing": true,
"inputs": {
"batch_size": {
"image": 64,
"video": 64
},
"batch_size_test": {
"image": 4,
"video": 4
},
"image_res": 224,
"max_txt_l": {
"image": 32,
"video": 32
},
"video_input": {
"num_frames": 8,
"num_frames_test": 8,
"random_aug": false,
"sample_type": "rand",
"sample_type_test": "middle"
}
},
"jump_evaluate": false,
"log_freq": 100,
"max_txt_l": 32,
"mode": "pt",
"model": {
"embed_dim": 512,
"find_unused_parameters": false,
"model_cls": "InternVideo2_Stage2",
"multimodal": {
"enable": true
},
"temp": 0.07,
"text_encoder": "bert_large",
"vision_encoder": {
"checkpoint_num": 40,
"clip_embed_dim": 768,
"clip_input_resolution": 224,
"clip_norm_type": "l2",
"clip_return_layer": 6,
"clip_student_return_interval": 1,
"clip_teacher": null,
"clip_teacher_embed_dim": 3200,
"clip_teacher_final_dim": 768,
"clip_teacher_return_interval": 1,
"d_model": 1408,
"image_mask_ratio": 0.5,
"image_mask_type": "random",
"img_size": 224,
"keep_temporal": false,
"name": "pretrain_internvideo2_1b_patch14_224",
"num_frames": 8,
"only_mask": true,
"patch_size": 14,
"pretrained": "/home/linanxi/InternVideo/checkpoints/InternVideo2-stage2_1b-224p-f4/InternVideo2-stage2_1b-224p-f4.pt",
"sep_image_video_pos_embed": true,
"tubelet_size": 1,
"use_checkpoint": false,
"use_flash_attn": true,
"use_fused_mlp": true,
"use_fused_rmsnorm": true,
"video_mask_ratio": 0.8,
"video_mask_type": "random"
}
},
"model_type": "internvideo2",
"num_frames": 8,
"num_frames_test": 8,
"num_workers": 6,
"optimizer": {
"different_lr": {
"enable": false,
"lr": 0.001,
"module_names": []
},
"lr": 5e-05,
"max_grad_norm": 3.0,
"opt": "adamW",
"opt_betas": [
0.9,
0.98
],
"weight_decay": 0.05
},
"output_dir": null,
"pretrained_path": "",
"resume": false,
"save_ckpt_iter": null,
"save_latest": true,
"scheduler": {
"epochs": 10,
"min_lr_multi": 0.01,
"sched": "cosine",
"warmup_epochs": 1
},
"seed": 42,
"test_file": {
"didemo_ret_test": "available_corpus[\"didemo_ret_test\"]",
"msrvtt_1k_test": "available_corpus[\"msrvtt_1k_test\"]"
},
"test_types": [
"msrvtt_1k_test",
"didemo_ret_test"
],
"text_enc": "bert_large",
"tokenizer": null,
"torch_dtype": "float16",
"train_file": "available_corpus[\"pretrain_example_data_1B\"]",
"transformers_version": "4.47.0",
"use_bf16": true,
"use_flash_sdp": false,
"use_half_precision": false,
"use_mem_efficient_sdp": false,
"wandb": {
"enable": false,
"entity": "opengvlab",
"project": "InternVideo2-Stage2"
}
}