| { | |
| "metadata": { | |
| "format": "torch", | |
| "version": "1.0", | |
| "model": "micro-distill-grpo-vae", | |
| "hidden_size": 512, | |
| "num_layers": 8, | |
| "num_heads": 8, | |
| "vocab_size": 50257, | |
| "training_steps": 100 | |
| }, | |
| "tensors": { | |
| "transformer.wte.weight": { | |
| "shape": [ | |
| 50257, | |
| 512 | |
| ], | |
| "dtype": "float32", | |
| "size": "98.2 MB" | |
| }, | |
| "transformer.wpe.weight": { | |
| "shape": [ | |
| 1024, | |
| 512 | |
| ], | |
| "dtype": "float32", | |
| "size": "2.0 MB" | |
| }, | |
| "transformer.h.0.ln_1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "float32", | |
| "size": "2.0 KB" | |
| }, | |
| "transformer.h.0.attn.c_attn.weight": { | |
| "shape": [ | |
| 512, | |
| 1536 | |
| ], | |
| "dtype": "float32", | |
| "size": "3.0 MB" | |
| }, | |
| "transformer.h.0.mlp.c_fc.weight": { | |
| "shape": [ | |
| 512, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "size": "4.0 MB" | |
| }, | |
| "transformer.h.1.ln_1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "float32", | |
| "size": "2.0 KB" | |
| }, | |
| "transformer.h.1.attn.c_attn.weight": { | |
| "shape": [ | |
| 512, | |
| 1536 | |
| ], | |
| "dtype": "float32", | |
| "size": "3.0 MB" | |
| }, | |
| "transformer.h.1.mlp.c_fc.weight": { | |
| "shape": [ | |
| 512, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "size": "4.0 MB" | |
| }, | |
| "transformer.h.2.ln_1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "float32", | |
| "size": "2.0 KB" | |
| }, | |
| "transformer.h.2.attn.c_attn.weight": { | |
| "shape": [ | |
| 512, | |
| 1536 | |
| ], | |
| "dtype": "float32", | |
| "size": "3.0 MB" | |
| }, | |
| "transformer.h.2.mlp.c_fc.weight": { | |
| "shape": [ | |
| 512, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "size": "4.0 MB" | |
| }, | |
| "transformer.h.3.ln_1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "float32", | |
| "size": "2.0 KB" | |
| }, | |
| "transformer.h.3.attn.c_attn.weight": { | |
| "shape": [ | |
| 512, | |
| 1536 | |
| ], | |
| "dtype": "float32", | |
| "size": "3.0 MB" | |
| }, | |
| "transformer.h.3.mlp.c_fc.weight": { | |
| "shape": [ | |
| 512, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "size": "4.0 MB" | |
| }, | |
| "transformer.h.4.ln_1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "float32", | |
| "size": "2.0 KB" | |
| }, | |
| "transformer.h.4.attn.c_attn.weight": { | |
| "shape": [ | |
| 512, | |
| 1536 | |
| ], | |
| "dtype": "float32", | |
| "size": "3.0 MB" | |
| }, | |
| "transformer.h.4.mlp.c_fc.weight": { | |
| "shape": [ | |
| 512, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "size": "4.0 MB" | |
| }, | |
| "transformer.h.5.ln_1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "float32", | |
| "size": "2.0 KB" | |
| }, | |
| "transformer.h.5.attn.c_attn.weight": { | |
| "shape": [ | |
| 512, | |
| 1536 | |
| ], | |
| "dtype": "float32", | |
| "size": "3.0 MB" | |
| }, | |
| "transformer.h.5.mlp.c_fc.weight": { | |
| "shape": [ | |
| 512, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "size": "4.0 MB" | |
| }, | |
| "transformer.h.6.ln_1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "float32", | |
| "size": "2.0 KB" | |
| }, | |
| "transformer.h.6.attn.c_attn.weight": { | |
| "shape": [ | |
| 512, | |
| 1536 | |
| ], | |
| "dtype": "float32", | |
| "size": "3.0 MB" | |
| }, | |
| "transformer.h.6.mlp.c_fc.weight": { | |
| "shape": [ | |
| 512, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "size": "4.0 MB" | |
| }, | |
| "transformer.h.7.ln_1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "float32", | |
| "size": "2.0 KB" | |
| }, | |
| "transformer.h.7.attn.c_attn.weight": { | |
| "shape": [ | |
| 512, | |
| 1536 | |
| ], | |
| "dtype": "float32", | |
| "size": "3.0 MB" | |
| }, | |
| "transformer.h.7.mlp.c_fc.weight": { | |
| "shape": [ | |
| 512, | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "size": "4.0 MB" | |
| }, | |
| "lm_head.weight": { | |
| "shape": [ | |
| 50257, | |
| 512 | |
| ], | |
| "dtype": "float32", | |
| "size": "98.2 MB" | |
| } | |
| } | |
| } |