SPOC / config.json
SafeVLAEval's picture
Upload 2 files
ccfb9ac verified
{
"visual_encoder": {
"image_encoder": "Dinov2Small",
"text_encoder": "t5-small",
"fusion_xformer": {
"num_layers": 3,
"d_model": 512,
"nhead": 8
},
"input_sensors": [
"raw_navigation_camera",
"raw_manipulation_camera",
"last_actions",
"an_object_is_in_hand"
],
"bbox_encoding_type": "positional"
},
"visual_text_encoder_class": "TextCondMultiCameraVisualEncoder",
"decoder": {
"num_layers": 3,
"d_model": 512,
"nhead": 8
},
"num_actions": 20,
"max_length": 1000,
"action_loss": true,
"use_llama_decoder": true,
"_attn_implementation_autoset": false,
"torch_dtype": "float32",
"architectures": [
"EarlyFusionCnnTransformer"
],
"model_type": "MM",
"transformers_version": "4.49.0"
}