microd_v1 / pytorch_model.bin
webxos's picture
Upload 12 files
6253d52 verified
{
"metadata": {
"format": "torch",
"version": "1.0",
"model": "micro-distill-grpo-vae",
"hidden_size": 512,
"num_layers": 8,
"num_heads": 8,
"vocab_size": 50257,
"training_steps": 100
},
"tensors": {
"transformer.wte.weight": {
"shape": [
50257,
512
],
"dtype": "float32",
"size": "98.2 MB"
},
"transformer.wpe.weight": {
"shape": [
1024,
512
],
"dtype": "float32",
"size": "2.0 MB"
},
"transformer.h.0.ln_1.weight": {
"shape": [
512
],
"dtype": "float32",
"size": "2.0 KB"
},
"transformer.h.0.attn.c_attn.weight": {
"shape": [
512,
1536
],
"dtype": "float32",
"size": "3.0 MB"
},
"transformer.h.0.mlp.c_fc.weight": {
"shape": [
512,
2048
],
"dtype": "float32",
"size": "4.0 MB"
},
"transformer.h.1.ln_1.weight": {
"shape": [
512
],
"dtype": "float32",
"size": "2.0 KB"
},
"transformer.h.1.attn.c_attn.weight": {
"shape": [
512,
1536
],
"dtype": "float32",
"size": "3.0 MB"
},
"transformer.h.1.mlp.c_fc.weight": {
"shape": [
512,
2048
],
"dtype": "float32",
"size": "4.0 MB"
},
"transformer.h.2.ln_1.weight": {
"shape": [
512
],
"dtype": "float32",
"size": "2.0 KB"
},
"transformer.h.2.attn.c_attn.weight": {
"shape": [
512,
1536
],
"dtype": "float32",
"size": "3.0 MB"
},
"transformer.h.2.mlp.c_fc.weight": {
"shape": [
512,
2048
],
"dtype": "float32",
"size": "4.0 MB"
},
"transformer.h.3.ln_1.weight": {
"shape": [
512
],
"dtype": "float32",
"size": "2.0 KB"
},
"transformer.h.3.attn.c_attn.weight": {
"shape": [
512,
1536
],
"dtype": "float32",
"size": "3.0 MB"
},
"transformer.h.3.mlp.c_fc.weight": {
"shape": [
512,
2048
],
"dtype": "float32",
"size": "4.0 MB"
},
"transformer.h.4.ln_1.weight": {
"shape": [
512
],
"dtype": "float32",
"size": "2.0 KB"
},
"transformer.h.4.attn.c_attn.weight": {
"shape": [
512,
1536
],
"dtype": "float32",
"size": "3.0 MB"
},
"transformer.h.4.mlp.c_fc.weight": {
"shape": [
512,
2048
],
"dtype": "float32",
"size": "4.0 MB"
},
"transformer.h.5.ln_1.weight": {
"shape": [
512
],
"dtype": "float32",
"size": "2.0 KB"
},
"transformer.h.5.attn.c_attn.weight": {
"shape": [
512,
1536
],
"dtype": "float32",
"size": "3.0 MB"
},
"transformer.h.5.mlp.c_fc.weight": {
"shape": [
512,
2048
],
"dtype": "float32",
"size": "4.0 MB"
},
"transformer.h.6.ln_1.weight": {
"shape": [
512
],
"dtype": "float32",
"size": "2.0 KB"
},
"transformer.h.6.attn.c_attn.weight": {
"shape": [
512,
1536
],
"dtype": "float32",
"size": "3.0 MB"
},
"transformer.h.6.mlp.c_fc.weight": {
"shape": [
512,
2048
],
"dtype": "float32",
"size": "4.0 MB"
},
"transformer.h.7.ln_1.weight": {
"shape": [
512
],
"dtype": "float32",
"size": "2.0 KB"
},
"transformer.h.7.attn.c_attn.weight": {
"shape": [
512,
1536
],
"dtype": "float32",
"size": "3.0 MB"
},
"transformer.h.7.mlp.c_fc.weight": {
"shape": [
512,
2048
],
"dtype": "float32",
"size": "4.0 MB"
},
"lm_head.weight": {
"shape": [
50257,
512
],
"dtype": "float32",
"size": "98.2 MB"
}
}
}