float-lab commited on
Commit
78a35b3
·
verified ·
1 Parent(s): 490c0fb

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. checkpoint-10000/config.json +64 -0
  2. checkpoint-10000/experiment_cfg/metadata.json +195 -0
  3. checkpoint-10000/model-00001-of-00002.safetensors +3 -0
  4. checkpoint-10000/model-00002-of-00002.safetensors +3 -0
  5. checkpoint-10000/model.safetensors.index.json +0 -0
  6. checkpoint-10000/rng_state_0.pth +3 -0
  7. checkpoint-10000/rng_state_1.pth +3 -0
  8. checkpoint-10000/scheduler.pt +3 -0
  9. checkpoint-10000/trainer_state.json +0 -0
  10. checkpoint-6000/config.json +64 -0
  11. checkpoint-6000/experiment_cfg/metadata.json +195 -0
  12. checkpoint-6000/model-00001-of-00002.safetensors +3 -0
  13. checkpoint-6000/model-00002-of-00002.safetensors +3 -0
  14. checkpoint-6000/model.safetensors.index.json +0 -0
  15. checkpoint-6000/rng_state_0.pth +3 -0
  16. checkpoint-6000/rng_state_1.pth +3 -0
  17. checkpoint-6000/scheduler.pt +3 -0
  18. checkpoint-6000/trainer_state.json +0 -0
  19. checkpoint-7000/config.json +64 -0
  20. checkpoint-7000/experiment_cfg/metadata.json +195 -0
  21. checkpoint-7000/model-00002-of-00002.safetensors +3 -0
  22. checkpoint-7000/model.safetensors.index.json +0 -0
  23. checkpoint-7000/rng_state_0.pth +3 -0
  24. checkpoint-7000/rng_state_1.pth +3 -0
  25. checkpoint-7000/scheduler.pt +3 -0
  26. checkpoint-7000/trainer_state.json +0 -0
  27. checkpoint-8000/config.json +64 -0
  28. checkpoint-8000/experiment_cfg/metadata.json +195 -0
  29. checkpoint-8000/model-00001-of-00002.safetensors +3 -0
  30. checkpoint-8000/model-00002-of-00002.safetensors +3 -0
  31. checkpoint-8000/model.safetensors.index.json +0 -0
  32. checkpoint-8000/rng_state_0.pth +3 -0
  33. checkpoint-8000/rng_state_1.pth +3 -0
  34. checkpoint-8000/scheduler.pt +3 -0
  35. checkpoint-8000/trainer_state.json +0 -0
  36. checkpoint-9000/config.json +64 -0
  37. checkpoint-9000/experiment_cfg/metadata.json +195 -0
  38. checkpoint-9000/model-00002-of-00002.safetensors +3 -0
  39. checkpoint-9000/model.safetensors.index.json +0 -0
  40. checkpoint-9000/rng_state_0.pth +3 -0
  41. checkpoint-9000/rng_state_1.pth +3 -0
  42. checkpoint-9000/scheduler.pt +3 -0
  43. checkpoint-9000/trainer_state.json +0 -0
  44. config.json +64 -0
  45. experiment_cfg/metadata.json +195 -0
  46. model-00001-of-00002.safetensors +3 -0
  47. model-00002-of-00002.safetensors +3 -0
  48. model.safetensors.index.json +0 -0
  49. trainer_state.json +0 -0
  50. training_args.bin +3 -0
checkpoint-10000/config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_dim": 32,
3
+ "action_head_cfg": {
4
+ "action_dim": 32,
5
+ "action_horizon": 16,
6
+ "add_pos_embed": true,
7
+ "backbone_embedding_dim": 2048,
8
+ "diffusion_model_cfg": {
9
+ "attention_head_dim": 48,
10
+ "cross_attention_dim": 2048,
11
+ "dropout": 0.2,
12
+ "final_dropout": true,
13
+ "interleave_self_attention": true,
14
+ "norm_type": "ada_norm",
15
+ "num_attention_heads": 32,
16
+ "num_layers": 16,
17
+ "output_dim": 1024,
18
+ "positional_embeddings": null
19
+ },
20
+ "hidden_size": 1024,
21
+ "input_embedding_dim": 1536,
22
+ "max_action_dim": 32,
23
+ "max_state_dim": 64,
24
+ "model_dtype": "float32",
25
+ "noise_beta_alpha": 1.5,
26
+ "noise_beta_beta": 1.0,
27
+ "noise_s": 0.999,
28
+ "num_inference_timesteps": 4,
29
+ "num_target_vision_tokens": 32,
30
+ "num_timestep_buckets": 1000,
31
+ "tune_diffusion_model": true,
32
+ "tune_projector": true,
33
+ "use_vlln": true,
34
+ "vl_self_attention_cfg": {
35
+ "attention_head_dim": 64,
36
+ "dropout": 0.2,
37
+ "final_dropout": true,
38
+ "num_attention_heads": 32,
39
+ "num_layers": 4,
40
+ "positional_embeddings": null
41
+ }
42
+ },
43
+ "action_horizon": 16,
44
+ "architectures": [
45
+ "GR00T_N1_5"
46
+ ],
47
+ "attn_implementation": null,
48
+ "backbone_cfg": {
49
+ "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
50
+ "load_bf16": false,
51
+ "project_to_dim": null,
52
+ "reproject_vision": false,
53
+ "select_layer": 12,
54
+ "tune_llm": false,
55
+ "tune_visual": true,
56
+ "use_flash_attention": true
57
+ },
58
+ "compute_dtype": "bfloat16",
59
+ "hidden_size": 2048,
60
+ "model_dtype": "float32",
61
+ "model_type": "gr00t_n1_5",
62
+ "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.51.3"
64
+ }
checkpoint-10000/experiment_cfg/metadata.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "statistics": {
4
+ "state": {
5
+ "single_arm": {
6
+ "max": [
7
+ 0.7119408845901489,
8
+ 0.6336884498596191,
9
+ 1.6356228590011597,
10
+ 1.612607479095459,
11
+ 0.05677114799618721
12
+ ],
13
+ "min": [
14
+ -0.6996660828590393,
15
+ -1.807470679283142,
16
+ -0.421947717666626,
17
+ -1.0464303493499756,
18
+ -2.122013568878174
19
+ ],
20
+ "mean": [
21
+ 0.055539075285196304,
22
+ -0.846017599105835,
23
+ 1.0085504055023193,
24
+ 0.7250792384147644,
25
+ -1.54018235206604
26
+ ],
27
+ "std": [
28
+ 0.342788428068161,
29
+ 0.7538483142852783,
30
+ 0.4811265468597412,
31
+ 0.5023943781852722,
32
+ 0.13101021945476532
33
+ ],
34
+ "q01": [
35
+ -0.6413605809211731,
36
+ -1.778317928314209,
37
+ -0.2680672299861908,
38
+ -0.5278182625770569,
39
+ -1.7804966950416565
40
+ ],
41
+ "q99": [
42
+ 0.5124747157096863,
43
+ 0.55697101354599,
44
+ 1.5308417570590973,
45
+ 1.4034902346134186,
46
+ -1.2842555046081543
47
+ ]
48
+ },
49
+ "gripper": {
50
+ "max": [
51
+ 1.11394202709198
52
+ ],
53
+ "min": [
54
+ 0.0
55
+ ],
56
+ "mean": [
57
+ 0.2416374385356903
58
+ ],
59
+ "std": [
60
+ 0.2475089430809021
61
+ ],
62
+ "q01": [
63
+ 0.006137421354651451
64
+ ],
65
+ "q99": [
66
+ 0.9217026126384735
67
+ ]
68
+ }
69
+ },
70
+ "action": {
71
+ "single_arm": {
72
+ "max": [
73
+ 0.741093635559082,
74
+ 0.5953298807144165,
75
+ 1.6279510259628296,
76
+ 1.6003326177597046,
77
+ 0.06290856748819351
78
+ ],
79
+ "min": [
80
+ -0.741093635559082,
81
+ -1.8780510425567627,
82
+ -0.5554366707801819,
83
+ -1.1032015085220337,
84
+ -2.12508225440979
85
+ ],
86
+ "mean": [
87
+ 0.06122366711497307,
88
+ -0.9115849137306213,
89
+ 0.9640885591506958,
90
+ 0.7144355177879333,
91
+ -1.5393561124801636
92
+ ],
93
+ "std": [
94
+ 0.35077351331710815,
95
+ 0.7666987180709839,
96
+ 0.5521473288536072,
97
+ 0.5145319104194641,
98
+ 0.13183332979679108
99
+ ],
100
+ "q01": [
101
+ -0.6347781735658645,
102
+ -1.874982237815857,
103
+ -0.4076475277543068,
104
+ -0.5703352582454682,
105
+ -1.779852271080017
106
+ ],
107
+ "q99": [
108
+ 0.5339556932449341,
109
+ 0.5104953962564469,
110
+ 1.6049357652664185,
111
+ 1.412895840406418,
112
+ -1.2717811787128446
113
+ ]
114
+ },
115
+ "gripper": {
116
+ "max": [
117
+ 1.1246825456619263
118
+ ],
119
+ "min": [
120
+ -0.026084041222929955
121
+ ],
122
+ "mean": [
123
+ 0.22775891423225403
124
+ ],
125
+ "std": [
126
+ 0.2665541470050812
127
+ ],
128
+ "q01": [
129
+ -0.010740487836301327
130
+ ],
131
+ "q99": [
132
+ 0.9495204681158068
133
+ ]
134
+ }
135
+ }
136
+ },
137
+ "modalities": {
138
+ "video": {
139
+ "gripper_cam": {
140
+ "resolution": [
141
+ 1920,
142
+ 1080
143
+ ],
144
+ "channels": 3,
145
+ "fps": 60.0
146
+ },
147
+ "front_cam": {
148
+ "resolution": [
149
+ 1920,
150
+ 1080
151
+ ],
152
+ "channels": 3,
153
+ "fps": 60.0
154
+ }
155
+ },
156
+ "state": {
157
+ "single_arm": {
158
+ "absolute": true,
159
+ "rotation_type": null,
160
+ "shape": [
161
+ 5
162
+ ],
163
+ "continuous": true
164
+ },
165
+ "gripper": {
166
+ "absolute": true,
167
+ "rotation_type": null,
168
+ "shape": [
169
+ 1
170
+ ],
171
+ "continuous": true
172
+ }
173
+ },
174
+ "action": {
175
+ "single_arm": {
176
+ "absolute": true,
177
+ "rotation_type": null,
178
+ "shape": [
179
+ 5
180
+ ],
181
+ "continuous": true
182
+ },
183
+ "gripper": {
184
+ "absolute": true,
185
+ "rotation_type": null,
186
+ "shape": [
187
+ 1
188
+ ],
189
+ "continuous": true
190
+ }
191
+ }
192
+ },
193
+ "embodiment_tag": "new_embodiment"
194
+ }
195
+ }
checkpoint-10000/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f06e363badeb2542664108c5ecba102df705e3ce75b791736e5db013a9afb23
3
+ size 4999367032
checkpoint-10000/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cfd89c37720b9e1ab09bac2f16634a8b4b6e9786f5f6d39e16d222a0945bc4d
3
+ size 2586705312
checkpoint-10000/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-10000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83d94ac1b893cca7d34628f9dca1b917e6a2905f1188673638692392a89f8344
3
+ size 14512
checkpoint-10000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50d76c0148abe86593d3589fb068fec9dd433d15f86bc78e5e651cffb1051178
3
+ size 14512
checkpoint-10000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07356cb668b8efc440ddb2f7f81f7da3152060e81d0229922fa3b30df3313f82
3
+ size 1064
checkpoint-10000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-6000/config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_dim": 32,
3
+ "action_head_cfg": {
4
+ "action_dim": 32,
5
+ "action_horizon": 16,
6
+ "add_pos_embed": true,
7
+ "backbone_embedding_dim": 2048,
8
+ "diffusion_model_cfg": {
9
+ "attention_head_dim": 48,
10
+ "cross_attention_dim": 2048,
11
+ "dropout": 0.2,
12
+ "final_dropout": true,
13
+ "interleave_self_attention": true,
14
+ "norm_type": "ada_norm",
15
+ "num_attention_heads": 32,
16
+ "num_layers": 16,
17
+ "output_dim": 1024,
18
+ "positional_embeddings": null
19
+ },
20
+ "hidden_size": 1024,
21
+ "input_embedding_dim": 1536,
22
+ "max_action_dim": 32,
23
+ "max_state_dim": 64,
24
+ "model_dtype": "float32",
25
+ "noise_beta_alpha": 1.5,
26
+ "noise_beta_beta": 1.0,
27
+ "noise_s": 0.999,
28
+ "num_inference_timesteps": 4,
29
+ "num_target_vision_tokens": 32,
30
+ "num_timestep_buckets": 1000,
31
+ "tune_diffusion_model": true,
32
+ "tune_projector": true,
33
+ "use_vlln": true,
34
+ "vl_self_attention_cfg": {
35
+ "attention_head_dim": 64,
36
+ "dropout": 0.2,
37
+ "final_dropout": true,
38
+ "num_attention_heads": 32,
39
+ "num_layers": 4,
40
+ "positional_embeddings": null
41
+ }
42
+ },
43
+ "action_horizon": 16,
44
+ "architectures": [
45
+ "GR00T_N1_5"
46
+ ],
47
+ "attn_implementation": null,
48
+ "backbone_cfg": {
49
+ "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
50
+ "load_bf16": false,
51
+ "project_to_dim": null,
52
+ "reproject_vision": false,
53
+ "select_layer": 12,
54
+ "tune_llm": false,
55
+ "tune_visual": true,
56
+ "use_flash_attention": true
57
+ },
58
+ "compute_dtype": "bfloat16",
59
+ "hidden_size": 2048,
60
+ "model_dtype": "float32",
61
+ "model_type": "gr00t_n1_5",
62
+ "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.51.3"
64
+ }
checkpoint-6000/experiment_cfg/metadata.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "statistics": {
4
+ "state": {
5
+ "single_arm": {
6
+ "max": [
7
+ 0.7119408845901489,
8
+ 0.6336884498596191,
9
+ 1.6356228590011597,
10
+ 1.612607479095459,
11
+ 0.05677114799618721
12
+ ],
13
+ "min": [
14
+ -0.6996660828590393,
15
+ -1.807470679283142,
16
+ -0.421947717666626,
17
+ -1.0464303493499756,
18
+ -2.122013568878174
19
+ ],
20
+ "mean": [
21
+ 0.055539075285196304,
22
+ -0.846017599105835,
23
+ 1.0085504055023193,
24
+ 0.7250792384147644,
25
+ -1.54018235206604
26
+ ],
27
+ "std": [
28
+ 0.342788428068161,
29
+ 0.7538483142852783,
30
+ 0.4811265468597412,
31
+ 0.5023943781852722,
32
+ 0.13101021945476532
33
+ ],
34
+ "q01": [
35
+ -0.6413605809211731,
36
+ -1.778317928314209,
37
+ -0.2680672299861908,
38
+ -0.5278182625770569,
39
+ -1.7804966950416565
40
+ ],
41
+ "q99": [
42
+ 0.5124747157096863,
43
+ 0.55697101354599,
44
+ 1.5308417570590973,
45
+ 1.4034902346134186,
46
+ -1.2842555046081543
47
+ ]
48
+ },
49
+ "gripper": {
50
+ "max": [
51
+ 1.11394202709198
52
+ ],
53
+ "min": [
54
+ 0.0
55
+ ],
56
+ "mean": [
57
+ 0.2416374385356903
58
+ ],
59
+ "std": [
60
+ 0.2475089430809021
61
+ ],
62
+ "q01": [
63
+ 0.006137421354651451
64
+ ],
65
+ "q99": [
66
+ 0.9217026126384735
67
+ ]
68
+ }
69
+ },
70
+ "action": {
71
+ "single_arm": {
72
+ "max": [
73
+ 0.741093635559082,
74
+ 0.5953298807144165,
75
+ 1.6279510259628296,
76
+ 1.6003326177597046,
77
+ 0.06290856748819351
78
+ ],
79
+ "min": [
80
+ -0.741093635559082,
81
+ -1.8780510425567627,
82
+ -0.5554366707801819,
83
+ -1.1032015085220337,
84
+ -2.12508225440979
85
+ ],
86
+ "mean": [
87
+ 0.06122366711497307,
88
+ -0.9115849137306213,
89
+ 0.9640885591506958,
90
+ 0.7144355177879333,
91
+ -1.5393561124801636
92
+ ],
93
+ "std": [
94
+ 0.35077351331710815,
95
+ 0.7666987180709839,
96
+ 0.5521473288536072,
97
+ 0.5145319104194641,
98
+ 0.13183332979679108
99
+ ],
100
+ "q01": [
101
+ -0.6347781735658645,
102
+ -1.874982237815857,
103
+ -0.4076475277543068,
104
+ -0.5703352582454682,
105
+ -1.779852271080017
106
+ ],
107
+ "q99": [
108
+ 0.5339556932449341,
109
+ 0.5104953962564469,
110
+ 1.6049357652664185,
111
+ 1.412895840406418,
112
+ -1.2717811787128446
113
+ ]
114
+ },
115
+ "gripper": {
116
+ "max": [
117
+ 1.1246825456619263
118
+ ],
119
+ "min": [
120
+ -0.026084041222929955
121
+ ],
122
+ "mean": [
123
+ 0.22775891423225403
124
+ ],
125
+ "std": [
126
+ 0.2665541470050812
127
+ ],
128
+ "q01": [
129
+ -0.010740487836301327
130
+ ],
131
+ "q99": [
132
+ 0.9495204681158068
133
+ ]
134
+ }
135
+ }
136
+ },
137
+ "modalities": {
138
+ "video": {
139
+ "gripper_cam": {
140
+ "resolution": [
141
+ 1920,
142
+ 1080
143
+ ],
144
+ "channels": 3,
145
+ "fps": 60.0
146
+ },
147
+ "front_cam": {
148
+ "resolution": [
149
+ 1920,
150
+ 1080
151
+ ],
152
+ "channels": 3,
153
+ "fps": 60.0
154
+ }
155
+ },
156
+ "state": {
157
+ "single_arm": {
158
+ "absolute": true,
159
+ "rotation_type": null,
160
+ "shape": [
161
+ 5
162
+ ],
163
+ "continuous": true
164
+ },
165
+ "gripper": {
166
+ "absolute": true,
167
+ "rotation_type": null,
168
+ "shape": [
169
+ 1
170
+ ],
171
+ "continuous": true
172
+ }
173
+ },
174
+ "action": {
175
+ "single_arm": {
176
+ "absolute": true,
177
+ "rotation_type": null,
178
+ "shape": [
179
+ 5
180
+ ],
181
+ "continuous": true
182
+ },
183
+ "gripper": {
184
+ "absolute": true,
185
+ "rotation_type": null,
186
+ "shape": [
187
+ 1
188
+ ],
189
+ "continuous": true
190
+ }
191
+ }
192
+ },
193
+ "embodiment_tag": "new_embodiment"
194
+ }
195
+ }
checkpoint-6000/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a3992e757ce89d1f1be6f6cf2652032f8579fb395f76f6489049a8704921198
3
+ size 4999367032
checkpoint-6000/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:154ace247f532fbb2e918176a82b35f9eb80d7ff0bf96ab415ea3f5f18f7190b
3
+ size 2586705312
checkpoint-6000/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-6000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce1576ee07b943cd05fccc371a4563b6eb81f660830a847e634d4de3dfcf3989
3
+ size 14512
checkpoint-6000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:141dd14cc766a4d1d2e6bedf818a89c7a5f6cb252b747e23aab2aefa28833e16
3
+ size 14512
checkpoint-6000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d6c0bf646ac78d188a2258804081d2e369772e8091e5395d9073771ec310aca
3
+ size 1064
checkpoint-6000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-7000/config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_dim": 32,
3
+ "action_head_cfg": {
4
+ "action_dim": 32,
5
+ "action_horizon": 16,
6
+ "add_pos_embed": true,
7
+ "backbone_embedding_dim": 2048,
8
+ "diffusion_model_cfg": {
9
+ "attention_head_dim": 48,
10
+ "cross_attention_dim": 2048,
11
+ "dropout": 0.2,
12
+ "final_dropout": true,
13
+ "interleave_self_attention": true,
14
+ "norm_type": "ada_norm",
15
+ "num_attention_heads": 32,
16
+ "num_layers": 16,
17
+ "output_dim": 1024,
18
+ "positional_embeddings": null
19
+ },
20
+ "hidden_size": 1024,
21
+ "input_embedding_dim": 1536,
22
+ "max_action_dim": 32,
23
+ "max_state_dim": 64,
24
+ "model_dtype": "float32",
25
+ "noise_beta_alpha": 1.5,
26
+ "noise_beta_beta": 1.0,
27
+ "noise_s": 0.999,
28
+ "num_inference_timesteps": 4,
29
+ "num_target_vision_tokens": 32,
30
+ "num_timestep_buckets": 1000,
31
+ "tune_diffusion_model": true,
32
+ "tune_projector": true,
33
+ "use_vlln": true,
34
+ "vl_self_attention_cfg": {
35
+ "attention_head_dim": 64,
36
+ "dropout": 0.2,
37
+ "final_dropout": true,
38
+ "num_attention_heads": 32,
39
+ "num_layers": 4,
40
+ "positional_embeddings": null
41
+ }
42
+ },
43
+ "action_horizon": 16,
44
+ "architectures": [
45
+ "GR00T_N1_5"
46
+ ],
47
+ "attn_implementation": null,
48
+ "backbone_cfg": {
49
+ "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
50
+ "load_bf16": false,
51
+ "project_to_dim": null,
52
+ "reproject_vision": false,
53
+ "select_layer": 12,
54
+ "tune_llm": false,
55
+ "tune_visual": true,
56
+ "use_flash_attention": true
57
+ },
58
+ "compute_dtype": "bfloat16",
59
+ "hidden_size": 2048,
60
+ "model_dtype": "float32",
61
+ "model_type": "gr00t_n1_5",
62
+ "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.51.3"
64
+ }
checkpoint-7000/experiment_cfg/metadata.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "statistics": {
4
+ "state": {
5
+ "single_arm": {
6
+ "max": [
7
+ 0.7119408845901489,
8
+ 0.6336884498596191,
9
+ 1.6356228590011597,
10
+ 1.612607479095459,
11
+ 0.05677114799618721
12
+ ],
13
+ "min": [
14
+ -0.6996660828590393,
15
+ -1.807470679283142,
16
+ -0.421947717666626,
17
+ -1.0464303493499756,
18
+ -2.122013568878174
19
+ ],
20
+ "mean": [
21
+ 0.055539075285196304,
22
+ -0.846017599105835,
23
+ 1.0085504055023193,
24
+ 0.7250792384147644,
25
+ -1.54018235206604
26
+ ],
27
+ "std": [
28
+ 0.342788428068161,
29
+ 0.7538483142852783,
30
+ 0.4811265468597412,
31
+ 0.5023943781852722,
32
+ 0.13101021945476532
33
+ ],
34
+ "q01": [
35
+ -0.6413605809211731,
36
+ -1.778317928314209,
37
+ -0.2680672299861908,
38
+ -0.5278182625770569,
39
+ -1.7804966950416565
40
+ ],
41
+ "q99": [
42
+ 0.5124747157096863,
43
+ 0.55697101354599,
44
+ 1.5308417570590973,
45
+ 1.4034902346134186,
46
+ -1.2842555046081543
47
+ ]
48
+ },
49
+ "gripper": {
50
+ "max": [
51
+ 1.11394202709198
52
+ ],
53
+ "min": [
54
+ 0.0
55
+ ],
56
+ "mean": [
57
+ 0.2416374385356903
58
+ ],
59
+ "std": [
60
+ 0.2475089430809021
61
+ ],
62
+ "q01": [
63
+ 0.006137421354651451
64
+ ],
65
+ "q99": [
66
+ 0.9217026126384735
67
+ ]
68
+ }
69
+ },
70
+ "action": {
71
+ "single_arm": {
72
+ "max": [
73
+ 0.741093635559082,
74
+ 0.5953298807144165,
75
+ 1.6279510259628296,
76
+ 1.6003326177597046,
77
+ 0.06290856748819351
78
+ ],
79
+ "min": [
80
+ -0.741093635559082,
81
+ -1.8780510425567627,
82
+ -0.5554366707801819,
83
+ -1.1032015085220337,
84
+ -2.12508225440979
85
+ ],
86
+ "mean": [
87
+ 0.06122366711497307,
88
+ -0.9115849137306213,
89
+ 0.9640885591506958,
90
+ 0.7144355177879333,
91
+ -1.5393561124801636
92
+ ],
93
+ "std": [
94
+ 0.35077351331710815,
95
+ 0.7666987180709839,
96
+ 0.5521473288536072,
97
+ 0.5145319104194641,
98
+ 0.13183332979679108
99
+ ],
100
+ "q01": [
101
+ -0.6347781735658645,
102
+ -1.874982237815857,
103
+ -0.4076475277543068,
104
+ -0.5703352582454682,
105
+ -1.779852271080017
106
+ ],
107
+ "q99": [
108
+ 0.5339556932449341,
109
+ 0.5104953962564469,
110
+ 1.6049357652664185,
111
+ 1.412895840406418,
112
+ -1.2717811787128446
113
+ ]
114
+ },
115
+ "gripper": {
116
+ "max": [
117
+ 1.1246825456619263
118
+ ],
119
+ "min": [
120
+ -0.026084041222929955
121
+ ],
122
+ "mean": [
123
+ 0.22775891423225403
124
+ ],
125
+ "std": [
126
+ 0.2665541470050812
127
+ ],
128
+ "q01": [
129
+ -0.010740487836301327
130
+ ],
131
+ "q99": [
132
+ 0.9495204681158068
133
+ ]
134
+ }
135
+ }
136
+ },
137
+ "modalities": {
138
+ "video": {
139
+ "gripper_cam": {
140
+ "resolution": [
141
+ 1920,
142
+ 1080
143
+ ],
144
+ "channels": 3,
145
+ "fps": 60.0
146
+ },
147
+ "front_cam": {
148
+ "resolution": [
149
+ 1920,
150
+ 1080
151
+ ],
152
+ "channels": 3,
153
+ "fps": 60.0
154
+ }
155
+ },
156
+ "state": {
157
+ "single_arm": {
158
+ "absolute": true,
159
+ "rotation_type": null,
160
+ "shape": [
161
+ 5
162
+ ],
163
+ "continuous": true
164
+ },
165
+ "gripper": {
166
+ "absolute": true,
167
+ "rotation_type": null,
168
+ "shape": [
169
+ 1
170
+ ],
171
+ "continuous": true
172
+ }
173
+ },
174
+ "action": {
175
+ "single_arm": {
176
+ "absolute": true,
177
+ "rotation_type": null,
178
+ "shape": [
179
+ 5
180
+ ],
181
+ "continuous": true
182
+ },
183
+ "gripper": {
184
+ "absolute": true,
185
+ "rotation_type": null,
186
+ "shape": [
187
+ 1
188
+ ],
189
+ "continuous": true
190
+ }
191
+ }
192
+ },
193
+ "embodiment_tag": "new_embodiment"
194
+ }
195
+ }
checkpoint-7000/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24bc3cf35587997081c634a1e9b2a8430f35c19199913d22fb7d224bee8496e7
3
+ size 2586705312
checkpoint-7000/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-7000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cbf32575974dd21faaeeca53c09838b4ae0ffa9d782d4036340287156bdb86b
3
+ size 14512
checkpoint-7000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42fbf7bacc5945effbee20c68b895e21250b2ce7abae15965c0225b34f571bf7
3
+ size 14512
checkpoint-7000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45049460c0fa55fae34b1884c511d0c5cb5241d0d636fad92c82b0e0a657580e
3
+ size 1064
checkpoint-7000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-8000/config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_dim": 32,
3
+ "action_head_cfg": {
4
+ "action_dim": 32,
5
+ "action_horizon": 16,
6
+ "add_pos_embed": true,
7
+ "backbone_embedding_dim": 2048,
8
+ "diffusion_model_cfg": {
9
+ "attention_head_dim": 48,
10
+ "cross_attention_dim": 2048,
11
+ "dropout": 0.2,
12
+ "final_dropout": true,
13
+ "interleave_self_attention": true,
14
+ "norm_type": "ada_norm",
15
+ "num_attention_heads": 32,
16
+ "num_layers": 16,
17
+ "output_dim": 1024,
18
+ "positional_embeddings": null
19
+ },
20
+ "hidden_size": 1024,
21
+ "input_embedding_dim": 1536,
22
+ "max_action_dim": 32,
23
+ "max_state_dim": 64,
24
+ "model_dtype": "float32",
25
+ "noise_beta_alpha": 1.5,
26
+ "noise_beta_beta": 1.0,
27
+ "noise_s": 0.999,
28
+ "num_inference_timesteps": 4,
29
+ "num_target_vision_tokens": 32,
30
+ "num_timestep_buckets": 1000,
31
+ "tune_diffusion_model": true,
32
+ "tune_projector": true,
33
+ "use_vlln": true,
34
+ "vl_self_attention_cfg": {
35
+ "attention_head_dim": 64,
36
+ "dropout": 0.2,
37
+ "final_dropout": true,
38
+ "num_attention_heads": 32,
39
+ "num_layers": 4,
40
+ "positional_embeddings": null
41
+ }
42
+ },
43
+ "action_horizon": 16,
44
+ "architectures": [
45
+ "GR00T_N1_5"
46
+ ],
47
+ "attn_implementation": null,
48
+ "backbone_cfg": {
49
+ "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
50
+ "load_bf16": false,
51
+ "project_to_dim": null,
52
+ "reproject_vision": false,
53
+ "select_layer": 12,
54
+ "tune_llm": false,
55
+ "tune_visual": true,
56
+ "use_flash_attention": true
57
+ },
58
+ "compute_dtype": "bfloat16",
59
+ "hidden_size": 2048,
60
+ "model_dtype": "float32",
61
+ "model_type": "gr00t_n1_5",
62
+ "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.51.3"
64
+ }
checkpoint-8000/experiment_cfg/metadata.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "statistics": {
4
+ "state": {
5
+ "single_arm": {
6
+ "max": [
7
+ 0.7119408845901489,
8
+ 0.6336884498596191,
9
+ 1.6356228590011597,
10
+ 1.612607479095459,
11
+ 0.05677114799618721
12
+ ],
13
+ "min": [
14
+ -0.6996660828590393,
15
+ -1.807470679283142,
16
+ -0.421947717666626,
17
+ -1.0464303493499756,
18
+ -2.122013568878174
19
+ ],
20
+ "mean": [
21
+ 0.055539075285196304,
22
+ -0.846017599105835,
23
+ 1.0085504055023193,
24
+ 0.7250792384147644,
25
+ -1.54018235206604
26
+ ],
27
+ "std": [
28
+ 0.342788428068161,
29
+ 0.7538483142852783,
30
+ 0.4811265468597412,
31
+ 0.5023943781852722,
32
+ 0.13101021945476532
33
+ ],
34
+ "q01": [
35
+ -0.6413605809211731,
36
+ -1.778317928314209,
37
+ -0.2680672299861908,
38
+ -0.5278182625770569,
39
+ -1.7804966950416565
40
+ ],
41
+ "q99": [
42
+ 0.5124747157096863,
43
+ 0.55697101354599,
44
+ 1.5308417570590973,
45
+ 1.4034902346134186,
46
+ -1.2842555046081543
47
+ ]
48
+ },
49
+ "gripper": {
50
+ "max": [
51
+ 1.11394202709198
52
+ ],
53
+ "min": [
54
+ 0.0
55
+ ],
56
+ "mean": [
57
+ 0.2416374385356903
58
+ ],
59
+ "std": [
60
+ 0.2475089430809021
61
+ ],
62
+ "q01": [
63
+ 0.006137421354651451
64
+ ],
65
+ "q99": [
66
+ 0.9217026126384735
67
+ ]
68
+ }
69
+ },
70
+ "action": {
71
+ "single_arm": {
72
+ "max": [
73
+ 0.741093635559082,
74
+ 0.5953298807144165,
75
+ 1.6279510259628296,
76
+ 1.6003326177597046,
77
+ 0.06290856748819351
78
+ ],
79
+ "min": [
80
+ -0.741093635559082,
81
+ -1.8780510425567627,
82
+ -0.5554366707801819,
83
+ -1.1032015085220337,
84
+ -2.12508225440979
85
+ ],
86
+ "mean": [
87
+ 0.06122366711497307,
88
+ -0.9115849137306213,
89
+ 0.9640885591506958,
90
+ 0.7144355177879333,
91
+ -1.5393561124801636
92
+ ],
93
+ "std": [
94
+ 0.35077351331710815,
95
+ 0.7666987180709839,
96
+ 0.5521473288536072,
97
+ 0.5145319104194641,
98
+ 0.13183332979679108
99
+ ],
100
+ "q01": [
101
+ -0.6347781735658645,
102
+ -1.874982237815857,
103
+ -0.4076475277543068,
104
+ -0.5703352582454682,
105
+ -1.779852271080017
106
+ ],
107
+ "q99": [
108
+ 0.5339556932449341,
109
+ 0.5104953962564469,
110
+ 1.6049357652664185,
111
+ 1.412895840406418,
112
+ -1.2717811787128446
113
+ ]
114
+ },
115
+ "gripper": {
116
+ "max": [
117
+ 1.1246825456619263
118
+ ],
119
+ "min": [
120
+ -0.026084041222929955
121
+ ],
122
+ "mean": [
123
+ 0.22775891423225403
124
+ ],
125
+ "std": [
126
+ 0.2665541470050812
127
+ ],
128
+ "q01": [
129
+ -0.010740487836301327
130
+ ],
131
+ "q99": [
132
+ 0.9495204681158068
133
+ ]
134
+ }
135
+ }
136
+ },
137
+ "modalities": {
138
+ "video": {
139
+ "gripper_cam": {
140
+ "resolution": [
141
+ 1920,
142
+ 1080
143
+ ],
144
+ "channels": 3,
145
+ "fps": 60.0
146
+ },
147
+ "front_cam": {
148
+ "resolution": [
149
+ 1920,
150
+ 1080
151
+ ],
152
+ "channels": 3,
153
+ "fps": 60.0
154
+ }
155
+ },
156
+ "state": {
157
+ "single_arm": {
158
+ "absolute": true,
159
+ "rotation_type": null,
160
+ "shape": [
161
+ 5
162
+ ],
163
+ "continuous": true
164
+ },
165
+ "gripper": {
166
+ "absolute": true,
167
+ "rotation_type": null,
168
+ "shape": [
169
+ 1
170
+ ],
171
+ "continuous": true
172
+ }
173
+ },
174
+ "action": {
175
+ "single_arm": {
176
+ "absolute": true,
177
+ "rotation_type": null,
178
+ "shape": [
179
+ 5
180
+ ],
181
+ "continuous": true
182
+ },
183
+ "gripper": {
184
+ "absolute": true,
185
+ "rotation_type": null,
186
+ "shape": [
187
+ 1
188
+ ],
189
+ "continuous": true
190
+ }
191
+ }
192
+ },
193
+ "embodiment_tag": "new_embodiment"
194
+ }
195
+ }
checkpoint-8000/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e54790eb5133235e6802df06052ae8d1a6fc0c5f6cdfc6ec71b3137530ffa9e
3
+ size 4999367032
checkpoint-8000/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49ef7d13dfd6761e5874215360409a676c7a472ef6c6de587a58c0f17820977e
3
+ size 2586705312
checkpoint-8000/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-8000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02cdc1caa88925c4f843fc663cd7113cbf7aa63c75a1cd4ac5b5a58448585d59
3
+ size 14512
checkpoint-8000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84a43e3f2fe28ceb41f8072461ed7ff9bed54e8e03b22a2ac98bd981f4d3705c
3
+ size 14512
checkpoint-8000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00739e77892b3b90e07da5aa63424b8c4adff832abcfc91f97c4f8838af650db
3
+ size 1064
checkpoint-8000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-9000/config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_dim": 32,
3
+ "action_head_cfg": {
4
+ "action_dim": 32,
5
+ "action_horizon": 16,
6
+ "add_pos_embed": true,
7
+ "backbone_embedding_dim": 2048,
8
+ "diffusion_model_cfg": {
9
+ "attention_head_dim": 48,
10
+ "cross_attention_dim": 2048,
11
+ "dropout": 0.2,
12
+ "final_dropout": true,
13
+ "interleave_self_attention": true,
14
+ "norm_type": "ada_norm",
15
+ "num_attention_heads": 32,
16
+ "num_layers": 16,
17
+ "output_dim": 1024,
18
+ "positional_embeddings": null
19
+ },
20
+ "hidden_size": 1024,
21
+ "input_embedding_dim": 1536,
22
+ "max_action_dim": 32,
23
+ "max_state_dim": 64,
24
+ "model_dtype": "float32",
25
+ "noise_beta_alpha": 1.5,
26
+ "noise_beta_beta": 1.0,
27
+ "noise_s": 0.999,
28
+ "num_inference_timesteps": 4,
29
+ "num_target_vision_tokens": 32,
30
+ "num_timestep_buckets": 1000,
31
+ "tune_diffusion_model": true,
32
+ "tune_projector": true,
33
+ "use_vlln": true,
34
+ "vl_self_attention_cfg": {
35
+ "attention_head_dim": 64,
36
+ "dropout": 0.2,
37
+ "final_dropout": true,
38
+ "num_attention_heads": 32,
39
+ "num_layers": 4,
40
+ "positional_embeddings": null
41
+ }
42
+ },
43
+ "action_horizon": 16,
44
+ "architectures": [
45
+ "GR00T_N1_5"
46
+ ],
47
+ "attn_implementation": null,
48
+ "backbone_cfg": {
49
+ "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
50
+ "load_bf16": false,
51
+ "project_to_dim": null,
52
+ "reproject_vision": false,
53
+ "select_layer": 12,
54
+ "tune_llm": false,
55
+ "tune_visual": true,
56
+ "use_flash_attention": true
57
+ },
58
+ "compute_dtype": "bfloat16",
59
+ "hidden_size": 2048,
60
+ "model_dtype": "float32",
61
+ "model_type": "gr00t_n1_5",
62
+ "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.51.3"
64
+ }
checkpoint-9000/experiment_cfg/metadata.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "statistics": {
4
+ "state": {
5
+ "single_arm": {
6
+ "max": [
7
+ 0.7119408845901489,
8
+ 0.6336884498596191,
9
+ 1.6356228590011597,
10
+ 1.612607479095459,
11
+ 0.05677114799618721
12
+ ],
13
+ "min": [
14
+ -0.6996660828590393,
15
+ -1.807470679283142,
16
+ -0.421947717666626,
17
+ -1.0464303493499756,
18
+ -2.122013568878174
19
+ ],
20
+ "mean": [
21
+ 0.055539075285196304,
22
+ -0.846017599105835,
23
+ 1.0085504055023193,
24
+ 0.7250792384147644,
25
+ -1.54018235206604
26
+ ],
27
+ "std": [
28
+ 0.342788428068161,
29
+ 0.7538483142852783,
30
+ 0.4811265468597412,
31
+ 0.5023943781852722,
32
+ 0.13101021945476532
33
+ ],
34
+ "q01": [
35
+ -0.6413605809211731,
36
+ -1.778317928314209,
37
+ -0.2680672299861908,
38
+ -0.5278182625770569,
39
+ -1.7804966950416565
40
+ ],
41
+ "q99": [
42
+ 0.5124747157096863,
43
+ 0.55697101354599,
44
+ 1.5308417570590973,
45
+ 1.4034902346134186,
46
+ -1.2842555046081543
47
+ ]
48
+ },
49
+ "gripper": {
50
+ "max": [
51
+ 1.11394202709198
52
+ ],
53
+ "min": [
54
+ 0.0
55
+ ],
56
+ "mean": [
57
+ 0.2416374385356903
58
+ ],
59
+ "std": [
60
+ 0.2475089430809021
61
+ ],
62
+ "q01": [
63
+ 0.006137421354651451
64
+ ],
65
+ "q99": [
66
+ 0.9217026126384735
67
+ ]
68
+ }
69
+ },
70
+ "action": {
71
+ "single_arm": {
72
+ "max": [
73
+ 0.741093635559082,
74
+ 0.5953298807144165,
75
+ 1.6279510259628296,
76
+ 1.6003326177597046,
77
+ 0.06290856748819351
78
+ ],
79
+ "min": [
80
+ -0.741093635559082,
81
+ -1.8780510425567627,
82
+ -0.5554366707801819,
83
+ -1.1032015085220337,
84
+ -2.12508225440979
85
+ ],
86
+ "mean": [
87
+ 0.06122366711497307,
88
+ -0.9115849137306213,
89
+ 0.9640885591506958,
90
+ 0.7144355177879333,
91
+ -1.5393561124801636
92
+ ],
93
+ "std": [
94
+ 0.35077351331710815,
95
+ 0.7666987180709839,
96
+ 0.5521473288536072,
97
+ 0.5145319104194641,
98
+ 0.13183332979679108
99
+ ],
100
+ "q01": [
101
+ -0.6347781735658645,
102
+ -1.874982237815857,
103
+ -0.4076475277543068,
104
+ -0.5703352582454682,
105
+ -1.779852271080017
106
+ ],
107
+ "q99": [
108
+ 0.5339556932449341,
109
+ 0.5104953962564469,
110
+ 1.6049357652664185,
111
+ 1.412895840406418,
112
+ -1.2717811787128446
113
+ ]
114
+ },
115
+ "gripper": {
116
+ "max": [
117
+ 1.1246825456619263
118
+ ],
119
+ "min": [
120
+ -0.026084041222929955
121
+ ],
122
+ "mean": [
123
+ 0.22775891423225403
124
+ ],
125
+ "std": [
126
+ 0.2665541470050812
127
+ ],
128
+ "q01": [
129
+ -0.010740487836301327
130
+ ],
131
+ "q99": [
132
+ 0.9495204681158068
133
+ ]
134
+ }
135
+ }
136
+ },
137
+ "modalities": {
138
+ "video": {
139
+ "gripper_cam": {
140
+ "resolution": [
141
+ 1920,
142
+ 1080
143
+ ],
144
+ "channels": 3,
145
+ "fps": 60.0
146
+ },
147
+ "front_cam": {
148
+ "resolution": [
149
+ 1920,
150
+ 1080
151
+ ],
152
+ "channels": 3,
153
+ "fps": 60.0
154
+ }
155
+ },
156
+ "state": {
157
+ "single_arm": {
158
+ "absolute": true,
159
+ "rotation_type": null,
160
+ "shape": [
161
+ 5
162
+ ],
163
+ "continuous": true
164
+ },
165
+ "gripper": {
166
+ "absolute": true,
167
+ "rotation_type": null,
168
+ "shape": [
169
+ 1
170
+ ],
171
+ "continuous": true
172
+ }
173
+ },
174
+ "action": {
175
+ "single_arm": {
176
+ "absolute": true,
177
+ "rotation_type": null,
178
+ "shape": [
179
+ 5
180
+ ],
181
+ "continuous": true
182
+ },
183
+ "gripper": {
184
+ "absolute": true,
185
+ "rotation_type": null,
186
+ "shape": [
187
+ 1
188
+ ],
189
+ "continuous": true
190
+ }
191
+ }
192
+ },
193
+ "embodiment_tag": "new_embodiment"
194
+ }
195
+ }
checkpoint-9000/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4a5b9398f52cb2707cf50b5c58baf0c74de9982df8bf7b547deb4d22f2dc3d2
3
+ size 2586705312
checkpoint-9000/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-9000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ff3cf2ed85d85ae7005dd6eabb0ebd9f9dd3f24983969131419b1cbafc46dc3
3
+ size 14512
checkpoint-9000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:282b9525f2b46aef1bf8b490b28bd546aa324badcd408509da29c81f05da2dd4
3
+ size 14512
checkpoint-9000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c26823aa5685497db4fe82a648513d12cb6c26d9be110887b197c10fe3a44ac
3
+ size 1064
checkpoint-9000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_dim": 32,
3
+ "action_head_cfg": {
4
+ "action_dim": 32,
5
+ "action_horizon": 16,
6
+ "add_pos_embed": true,
7
+ "backbone_embedding_dim": 2048,
8
+ "diffusion_model_cfg": {
9
+ "attention_head_dim": 48,
10
+ "cross_attention_dim": 2048,
11
+ "dropout": 0.2,
12
+ "final_dropout": true,
13
+ "interleave_self_attention": true,
14
+ "norm_type": "ada_norm",
15
+ "num_attention_heads": 32,
16
+ "num_layers": 16,
17
+ "output_dim": 1024,
18
+ "positional_embeddings": null
19
+ },
20
+ "hidden_size": 1024,
21
+ "input_embedding_dim": 1536,
22
+ "max_action_dim": 32,
23
+ "max_state_dim": 64,
24
+ "model_dtype": "float32",
25
+ "noise_beta_alpha": 1.5,
26
+ "noise_beta_beta": 1.0,
27
+ "noise_s": 0.999,
28
+ "num_inference_timesteps": 4,
29
+ "num_target_vision_tokens": 32,
30
+ "num_timestep_buckets": 1000,
31
+ "tune_diffusion_model": true,
32
+ "tune_projector": true,
33
+ "use_vlln": true,
34
+ "vl_self_attention_cfg": {
35
+ "attention_head_dim": 64,
36
+ "dropout": 0.2,
37
+ "final_dropout": true,
38
+ "num_attention_heads": 32,
39
+ "num_layers": 4,
40
+ "positional_embeddings": null
41
+ }
42
+ },
43
+ "action_horizon": 16,
44
+ "architectures": [
45
+ "GR00T_N1_5"
46
+ ],
47
+ "attn_implementation": null,
48
+ "backbone_cfg": {
49
+ "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
50
+ "load_bf16": false,
51
+ "project_to_dim": null,
52
+ "reproject_vision": false,
53
+ "select_layer": 12,
54
+ "tune_llm": false,
55
+ "tune_visual": true,
56
+ "use_flash_attention": true
57
+ },
58
+ "compute_dtype": "bfloat16",
59
+ "hidden_size": 2048,
60
+ "model_dtype": "float32",
61
+ "model_type": "gr00t_n1_5",
62
+ "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.51.3"
64
+ }
experiment_cfg/metadata.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "statistics": {
4
+ "state": {
5
+ "single_arm": {
6
+ "max": [
7
+ 0.7119408845901489,
8
+ 0.6336884498596191,
9
+ 1.6356228590011597,
10
+ 1.612607479095459,
11
+ 0.05677114799618721
12
+ ],
13
+ "min": [
14
+ -0.6996660828590393,
15
+ -1.807470679283142,
16
+ -0.421947717666626,
17
+ -1.0464303493499756,
18
+ -2.122013568878174
19
+ ],
20
+ "mean": [
21
+ 0.055539075285196304,
22
+ -0.846017599105835,
23
+ 1.0085504055023193,
24
+ 0.7250792384147644,
25
+ -1.54018235206604
26
+ ],
27
+ "std": [
28
+ 0.342788428068161,
29
+ 0.7538483142852783,
30
+ 0.4811265468597412,
31
+ 0.5023943781852722,
32
+ 0.13101021945476532
33
+ ],
34
+ "q01": [
35
+ -0.6413605809211731,
36
+ -1.778317928314209,
37
+ -0.2680672299861908,
38
+ -0.5278182625770569,
39
+ -1.7804966950416565
40
+ ],
41
+ "q99": [
42
+ 0.5124747157096863,
43
+ 0.55697101354599,
44
+ 1.5308417570590973,
45
+ 1.4034902346134186,
46
+ -1.2842555046081543
47
+ ]
48
+ },
49
+ "gripper": {
50
+ "max": [
51
+ 1.11394202709198
52
+ ],
53
+ "min": [
54
+ 0.0
55
+ ],
56
+ "mean": [
57
+ 0.2416374385356903
58
+ ],
59
+ "std": [
60
+ 0.2475089430809021
61
+ ],
62
+ "q01": [
63
+ 0.006137421354651451
64
+ ],
65
+ "q99": [
66
+ 0.9217026126384735
67
+ ]
68
+ }
69
+ },
70
+ "action": {
71
+ "single_arm": {
72
+ "max": [
73
+ 0.741093635559082,
74
+ 0.5953298807144165,
75
+ 1.6279510259628296,
76
+ 1.6003326177597046,
77
+ 0.06290856748819351
78
+ ],
79
+ "min": [
80
+ -0.741093635559082,
81
+ -1.8780510425567627,
82
+ -0.5554366707801819,
83
+ -1.1032015085220337,
84
+ -2.12508225440979
85
+ ],
86
+ "mean": [
87
+ 0.06122366711497307,
88
+ -0.9115849137306213,
89
+ 0.9640885591506958,
90
+ 0.7144355177879333,
91
+ -1.5393561124801636
92
+ ],
93
+ "std": [
94
+ 0.35077351331710815,
95
+ 0.7666987180709839,
96
+ 0.5521473288536072,
97
+ 0.5145319104194641,
98
+ 0.13183332979679108
99
+ ],
100
+ "q01": [
101
+ -0.6347781735658645,
102
+ -1.874982237815857,
103
+ -0.4076475277543068,
104
+ -0.5703352582454682,
105
+ -1.779852271080017
106
+ ],
107
+ "q99": [
108
+ 0.5339556932449341,
109
+ 0.5104953962564469,
110
+ 1.6049357652664185,
111
+ 1.412895840406418,
112
+ -1.2717811787128446
113
+ ]
114
+ },
115
+ "gripper": {
116
+ "max": [
117
+ 1.1246825456619263
118
+ ],
119
+ "min": [
120
+ -0.026084041222929955
121
+ ],
122
+ "mean": [
123
+ 0.22775891423225403
124
+ ],
125
+ "std": [
126
+ 0.2665541470050812
127
+ ],
128
+ "q01": [
129
+ -0.010740487836301327
130
+ ],
131
+ "q99": [
132
+ 0.9495204681158068
133
+ ]
134
+ }
135
+ }
136
+ },
137
+ "modalities": {
138
+ "video": {
139
+ "gripper_cam": {
140
+ "resolution": [
141
+ 1920,
142
+ 1080
143
+ ],
144
+ "channels": 3,
145
+ "fps": 60.0
146
+ },
147
+ "front_cam": {
148
+ "resolution": [
149
+ 1920,
150
+ 1080
151
+ ],
152
+ "channels": 3,
153
+ "fps": 60.0
154
+ }
155
+ },
156
+ "state": {
157
+ "single_arm": {
158
+ "absolute": true,
159
+ "rotation_type": null,
160
+ "shape": [
161
+ 5
162
+ ],
163
+ "continuous": true
164
+ },
165
+ "gripper": {
166
+ "absolute": true,
167
+ "rotation_type": null,
168
+ "shape": [
169
+ 1
170
+ ],
171
+ "continuous": true
172
+ }
173
+ },
174
+ "action": {
175
+ "single_arm": {
176
+ "absolute": true,
177
+ "rotation_type": null,
178
+ "shape": [
179
+ 5
180
+ ],
181
+ "continuous": true
182
+ },
183
+ "gripper": {
184
+ "absolute": true,
185
+ "rotation_type": null,
186
+ "shape": [
187
+ 1
188
+ ],
189
+ "continuous": true
190
+ }
191
+ }
192
+ },
193
+ "embodiment_tag": "new_embodiment"
194
+ }
195
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f06e363badeb2542664108c5ecba102df705e3ce75b791736e5db013a9afb23
3
+ size 4999367032
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cfd89c37720b9e1ab09bac2f16634a8b4b6e9786f5f6d39e16d222a0945bc4d
3
+ size 2586705312
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb0e17bf33284019940c3c1034c5119bf667a95ec3b60688fa01d2912f414557
3
+ size 5368