HuggingFace commited on
Commit
656b6be
·
1 Parent(s): f590f7c

Publication of PushT Diffusion Policy model with demonstration videos - 2025-04-28

Browse files
Files changed (4) hide show
  1. README.md +26 -0
  2. metadata.json +13 -0
  3. model/config.json +76 -0
  4. model/model.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # PushT Diffusion Policy - Robot Control Model
3
+
4
+ This model is an implementation of Diffusion Policy for the PushT environment, which simulates robotic pushing tasks.
5
+
6
+ ## Model
7
+
8
+ This model uses a conditional diffusion architecture to predict robotic actions based on visual observations.
9
+
10
+ ## Performance
11
+
12
+ The model achieves a success rate of 40.0% in the PushT environment with different initial configurations.
13
+
14
+ ## Demonstration Videos
15
+
16
+ The repository includes demonstration videos in the `videos/` folder.
17
+
18
+ ## Usage
19
+
20
+ ```python
21
+ from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy
22
+
23
+ policy = DiffusionPolicy.from_pretrained("RafaelJaime/pusht-diffusion")
24
+ ```
25
+
26
+ Published on 2025-04-28
metadata.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "library_name": "lerobot",
3
+ "task_type": "robot-control",
4
+ "environment": "PushT",
5
+ "timestamp": "2025-04-28T04:50:36.767363",
6
+ "videos": [
7
+ "video_2_experiment_summary.mp4",
8
+ "video_1_pusht_episode.mp4",
9
+ "pusht_episode.mp4",
10
+ "experiment_summary.mp4"
11
+ ],
12
+ "device": "cuda"
13
+ }
model/config.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "diffusion",
3
+ "n_obs_steps": 2,
4
+ "normalization_mapping": {
5
+ "ACTION": "MIN_MAX",
6
+ "STATE": "MIN_MAX",
7
+ "VISUAL": "MEAN_STD"
8
+ },
9
+ "input_features": {
10
+ "observation.image": {
11
+ "type": "VISUAL",
12
+ "shape": [
13
+ 3,
14
+ 96,
15
+ 96
16
+ ]
17
+ },
18
+ "observation.state": {
19
+ "type": "STATE",
20
+ "shape": [
21
+ 2
22
+ ]
23
+ }
24
+ },
25
+ "output_features": {
26
+ "action": {
27
+ "type": "ACTION",
28
+ "shape": [
29
+ 2
30
+ ]
31
+ }
32
+ },
33
+ "device": "cuda",
34
+ "use_amp": false,
35
+ "horizon": 16,
36
+ "n_action_steps": 8,
37
+ "drop_n_last_frames": 7,
38
+ "vision_backbone": "resnet18",
39
+ "crop_shape": [
40
+ 84,
41
+ 84
42
+ ],
43
+ "crop_is_random": true,
44
+ "pretrained_backbone_weights": null,
45
+ "use_group_norm": true,
46
+ "spatial_softmax_num_keypoints": 32,
47
+ "use_separate_rgb_encoder_per_camera": false,
48
+ "down_dims": [
49
+ 512,
50
+ 1024,
51
+ 2048
52
+ ],
53
+ "kernel_size": 5,
54
+ "n_groups": 8,
55
+ "diffusion_step_embed_dim": 128,
56
+ "use_film_scale_modulation": true,
57
+ "noise_scheduler_type": "DDPM",
58
+ "num_train_timesteps": 100,
59
+ "beta_schedule": "squaredcos_cap_v2",
60
+ "beta_start": 0.0001,
61
+ "beta_end": 0.02,
62
+ "prediction_type": "epsilon",
63
+ "clip_sample": true,
64
+ "clip_sample_range": 1.0,
65
+ "num_inference_steps": null,
66
+ "do_mask_loss_for_padding": false,
67
+ "optimizer_lr": 0.0001,
68
+ "optimizer_betas": [
69
+ 0.95,
70
+ 0.999
71
+ ],
72
+ "optimizer_eps": 1e-08,
73
+ "optimizer_weight_decay": 1e-06,
74
+ "scheduler_name": "cosine",
75
+ "scheduler_warmup_steps": 500
76
+ }
model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:995d14d35db57d95c35ad9704c3d79c8612b7bc45f3877e5c46c2cdc516856a8
3
+ size 1050862408